1 | /* cmp -- compare two files. |
---|
2 | Copyright (C) 1990, 1991, 1992, 1993, 1994 Free Software Foundation, Inc. |
---|
3 | |
---|
4 | This program is free software; you can redistribute it and/or modify |
---|
5 | it under the terms of the GNU General Public License as published by |
---|
6 | the Free Software Foundation; either version 2, or (at your option) |
---|
7 | any later version. |
---|
8 | |
---|
9 | This program is distributed in the hope that it will be useful, |
---|
10 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
---|
11 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
---|
12 | GNU General Public License for more details. |
---|
13 | |
---|
14 | You should have received a copy of the GNU General Public License |
---|
15 | along with this program; if not, write to the Free Software |
---|
16 | Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ |
---|
17 | |
---|
18 | /* Written by Torbjorn Granlund and David MacKenzie. */ |
---|
19 | |
---|
20 | #include "system.h" |
---|
21 | #include <stdio.h> |
---|
22 | #include "getopt.h" |
---|
23 | #include "cmpbuf.h" |
---|
24 | |
---|
25 | extern char const version_string[]; |
---|
26 | |
---|
27 | #if __STDC__ && defined (HAVE_VPRINTF) |
---|
28 | void error (int, int, char const *, ...); |
---|
29 | #else |
---|
30 | void error (); |
---|
31 | #endif |
---|
32 | VOID *xmalloc PARAMS((size_t)); |
---|
33 | |
---|
34 | static int cmp PARAMS((void)); |
---|
35 | static off_t file_position PARAMS((int)); |
---|
36 | static size_t block_compare PARAMS((char const *, char const *)); |
---|
37 | static size_t block_compare_and_count PARAMS((char const *, char const *, long *)); |
---|
38 | static size_t block_read PARAMS((int, char *, size_t)); |
---|
39 | static void printc PARAMS((int, unsigned)); |
---|
40 | static void try_help PARAMS((char const *)); |
---|
41 | static void check_stdout PARAMS((void)); |
---|
42 | static void usage PARAMS((void)); |
---|
43 | |
---|
44 | /* Name under which this program was invoked. */ |
---|
45 | char const *program_name; |
---|
46 | |
---|
47 | /* Filenames of the compared files. */ |
---|
48 | static char const *file[2]; |
---|
49 | |
---|
50 | /* File descriptors of the files. */ |
---|
51 | static int file_desc[2]; |
---|
52 | |
---|
53 | /* Read buffers for the files. */ |
---|
54 | static char *buffer[2]; |
---|
55 | |
---|
56 | /* Optimal block size for the files. */ |
---|
57 | static size_t buf_size; |
---|
58 | |
---|
59 | /* Initial prefix to ignore for each file. */ |
---|
60 | static off_t ignore_initial; |
---|
61 | |
---|
62 | /* Output format: |
---|
63 | type_first_diff |
---|
64 | to print the offset and line number of the first differing bytes |
---|
65 | type_all_diffs |
---|
66 | to print the (decimal) offsets and (octal) values of all differing bytes |
---|
67 | type_status |
---|
68 | to only return an exit status indicating whether the files differ */ |
---|
69 | static enum |
---|
70 | { |
---|
71 | type_first_diff, type_all_diffs, type_status |
---|
72 | } comparison_type; |
---|
73 | |
---|
74 | /* Type used for fast comparison of several bytes at a time. */ |
---|
75 | #ifndef word |
---|
76 | #define word int |
---|
77 | #endif |
---|
78 | |
---|
79 | /* If nonzero, print values of bytes quoted like cat -t does. */ |
---|
80 | static int opt_print_chars; |
---|
81 | |
---|
82 | static struct option const long_options[] = |
---|
83 | { |
---|
84 | {"print-chars", 0, 0, 'c'}, |
---|
85 | {"ignore-initial", 1, 0, 'i'}, |
---|
86 | {"verbose", 0, 0, 'l'}, |
---|
87 | {"silent", 0, 0, 's'}, |
---|
88 | {"quiet", 0, 0, 's'}, |
---|
89 | {"version", 0, 0, 'v'}, |
---|
90 | {"help", 0, 0, 129}, |
---|
91 | {0, 0, 0, 0} |
---|
92 | }; |
---|
93 | |
---|
94 | static void |
---|
95 | try_help (reason) |
---|
96 | char const *reason; |
---|
97 | { |
---|
98 | if (reason) |
---|
99 | error (0, 0, "%s", reason); |
---|
100 | error (2, 0, "Try `%s --help' for more information.", program_name); |
---|
101 | } |
---|
102 | |
---|
103 | static void |
---|
104 | check_stdout () |
---|
105 | { |
---|
106 | if (ferror (stdout)) |
---|
107 | error (2, 0, "write error"); |
---|
108 | else if (fclose (stdout) != 0) |
---|
109 | error (2, errno, "write error"); |
---|
110 | } |
---|
111 | |
---|
112 | static void |
---|
113 | usage () |
---|
114 | { |
---|
115 | printf ("Usage: %s [OPTION]... FILE1 [FILE2]\n", program_name); |
---|
116 | printf ("%s", "\ |
---|
117 | -c --print-chars Output differing bytes as characters.\n\ |
---|
118 | -i N --ignore-initial=N Ignore differences in the first N bytes of input.\n\ |
---|
119 | -l --verbose Output offsets and codes of all differing bytes.\n\ |
---|
120 | -s --quiet --silent Output nothing; yield exit status only.\n\ |
---|
121 | -v --version Output version info.\n\ |
---|
122 | --help Output this help.\n"); |
---|
123 | printf ("If a FILE is `-' or missing, read standard input.\n"); |
---|
124 | } |
---|
125 | |
---|
126 | int |
---|
127 | main (argc, argv) |
---|
128 | int argc; |
---|
129 | char *argv[]; |
---|
130 | { |
---|
131 | int c, i, exit_status; |
---|
132 | struct stat stat_buf[2]; |
---|
133 | |
---|
134 | initialize_main (&argc, &argv); |
---|
135 | program_name = argv[0]; |
---|
136 | |
---|
137 | /* Parse command line options. */ |
---|
138 | |
---|
139 | while ((c = getopt_long (argc, argv, "ci:lsv", long_options, 0)) |
---|
140 | != EOF) |
---|
141 | switch (c) |
---|
142 | { |
---|
143 | case 'c': |
---|
144 | opt_print_chars = 1; |
---|
145 | break; |
---|
146 | |
---|
147 | case 'i': |
---|
148 | ignore_initial = 0; |
---|
149 | while (*optarg) |
---|
150 | { |
---|
151 | /* Don't use `atol', because `off_t' may be longer than `long'. */ |
---|
152 | unsigned digit = *optarg++ - '0'; |
---|
153 | if (9 < digit) |
---|
154 | try_help ("non-digit in --ignore-initial value"); |
---|
155 | ignore_initial = 10 * ignore_initial + digit; |
---|
156 | } |
---|
157 | break; |
---|
158 | |
---|
159 | case 'l': |
---|
160 | comparison_type = type_all_diffs; |
---|
161 | break; |
---|
162 | |
---|
163 | case 's': |
---|
164 | comparison_type = type_status; |
---|
165 | break; |
---|
166 | |
---|
167 | case 'v': |
---|
168 | printf ("cmp - GNU diffutils version %s\n", version_string); |
---|
169 | exit (0); |
---|
170 | |
---|
171 | case 129: |
---|
172 | usage (); |
---|
173 | check_stdout (); |
---|
174 | exit (0); |
---|
175 | |
---|
176 | default: |
---|
177 | try_help (0); |
---|
178 | } |
---|
179 | |
---|
180 | if (optind == argc) |
---|
181 | try_help ("missing operand"); |
---|
182 | |
---|
183 | file[0] = argv[optind++]; |
---|
184 | file[1] = optind < argc ? argv[optind++] : "-"; |
---|
185 | |
---|
186 | if (optind < argc) |
---|
187 | try_help ("extra operands"); |
---|
188 | |
---|
189 | for (i = 0; i < 2; i++) |
---|
190 | { |
---|
191 | /* If file[1] is "-", treat it first; this avoids a misdiagnostic if |
---|
192 | stdin is closed and opening file[0] yields file descriptor 0. */ |
---|
193 | int i1 = i ^ (strcmp (file[1], "-") == 0); |
---|
194 | |
---|
195 | /* Two files with the same name are identical. |
---|
196 | But wait until we open the file once, for proper diagnostics. */ |
---|
197 | if (i && filename_cmp (file[0], file[1]) == 0) |
---|
198 | exit (0); |
---|
199 | |
---|
200 | file_desc[i1] = (strcmp (file[i1], "-") == 0 |
---|
201 | ? STDIN_FILENO |
---|
202 | : open (file[i1], O_RDONLY, 0)); |
---|
203 | if (file_desc[i1] < 0 || fstat (file_desc[i1], &stat_buf[i1]) != 0) |
---|
204 | { |
---|
205 | if (file_desc[i1] < 0 && comparison_type == type_status) |
---|
206 | exit (2); |
---|
207 | else |
---|
208 | error (2, errno, "%s", file[i1]); |
---|
209 | } |
---|
210 | #if HAVE_SETMODE |
---|
211 | setmode (file_desc[i1], O_BINARY); |
---|
212 | #endif |
---|
213 | } |
---|
214 | |
---|
215 | /* If the files are links to the same inode and have the same file position, |
---|
216 | they are identical. */ |
---|
217 | |
---|
218 | if (0 < same_file (&stat_buf[0], &stat_buf[1]) |
---|
219 | && file_position (0) == file_position (1)) |
---|
220 | exit (0); |
---|
221 | |
---|
222 | /* If output is redirected to the null device, we may assume `-s'. */ |
---|
223 | |
---|
224 | if (comparison_type != type_status) |
---|
225 | { |
---|
226 | struct stat outstat, nullstat; |
---|
227 | |
---|
228 | if (fstat (STDOUT_FILENO, &outstat) == 0 |
---|
229 | && stat (NULL_DEVICE, &nullstat) == 0 |
---|
230 | && 0 < same_file (&outstat, &nullstat)) |
---|
231 | comparison_type = type_status; |
---|
232 | } |
---|
233 | |
---|
234 | /* If only a return code is needed, |
---|
235 | and if both input descriptors are associated with plain files, |
---|
236 | conclude that the files differ if they have different sizes. */ |
---|
237 | |
---|
238 | if (comparison_type == type_status |
---|
239 | && S_ISREG (stat_buf[0].st_mode) |
---|
240 | && S_ISREG (stat_buf[1].st_mode)) |
---|
241 | { |
---|
242 | off_t s0 = stat_buf[0].st_size - file_position (0); |
---|
243 | off_t s1 = stat_buf[1].st_size - file_position (1); |
---|
244 | |
---|
245 | if (max (0, s0) != max (0, s1)) |
---|
246 | exit (1); |
---|
247 | } |
---|
248 | |
---|
249 | /* Get the optimal block size of the files. */ |
---|
250 | |
---|
251 | buf_size = buffer_lcm (STAT_BLOCKSIZE (stat_buf[0]), |
---|
252 | STAT_BLOCKSIZE (stat_buf[1])); |
---|
253 | |
---|
254 | /* Allocate buffers, with space for sentinels at the end. */ |
---|
255 | |
---|
256 | for (i = 0; i < 2; i++) |
---|
257 | buffer[i] = xmalloc (buf_size + sizeof (word)); |
---|
258 | |
---|
259 | exit_status = cmp (); |
---|
260 | |
---|
261 | for (i = 0; i < 2; i++) |
---|
262 | if (close (file_desc[i]) != 0) |
---|
263 | error (2, errno, "%s", file[i]); |
---|
264 | if (exit_status != 0 && comparison_type != type_status) |
---|
265 | check_stdout (); |
---|
266 | exit (exit_status); |
---|
267 | return exit_status; |
---|
268 | } |
---|
269 | |
---|
270 | /* Compare the two files already open on `file_desc[0]' and `file_desc[1]', |
---|
271 | using `buffer[0]' and `buffer[1]'. |
---|
272 | Return 0 if identical, 1 if different, >1 if error. */ |
---|
273 | |
---|
274 | static int |
---|
275 | cmp () |
---|
276 | { |
---|
277 | long line_number = 1; /* Line number (1...) of first difference. */ |
---|
278 | long char_number = ignore_initial + 1; |
---|
279 | /* Offset (1...) in files of 1st difference. */ |
---|
280 | size_t read0, read1; /* Number of chars read from each file. */ |
---|
281 | size_t first_diff; /* Offset (0...) in buffers of 1st diff. */ |
---|
282 | size_t smaller; /* The lesser of `read0' and `read1'. */ |
---|
283 | char *buf0 = buffer[0]; |
---|
284 | char *buf1 = buffer[1]; |
---|
285 | int ret = 0; |
---|
286 | int i; |
---|
287 | |
---|
288 | if (ignore_initial) |
---|
289 | for (i = 0; i < 2; i++) |
---|
290 | if (file_position (i) == -1) |
---|
291 | { |
---|
292 | /* lseek failed; read and discard the ignored initial prefix. */ |
---|
293 | off_t ig = ignore_initial; |
---|
294 | do |
---|
295 | { |
---|
296 | size_t r = read (file_desc[i], buf0, (size_t) min (ig, buf_size)); |
---|
297 | if (!r) |
---|
298 | break; |
---|
299 | if (r == -1) |
---|
300 | error (2, errno, "%s", file[i]); |
---|
301 | ig -= r; |
---|
302 | } |
---|
303 | while (ig); |
---|
304 | } |
---|
305 | |
---|
306 | do |
---|
307 | { |
---|
308 | read0 = block_read (file_desc[0], buf0, buf_size); |
---|
309 | if (read0 == -1) |
---|
310 | error (2, errno, "%s", file[0]); |
---|
311 | read1 = block_read (file_desc[1], buf1, buf_size); |
---|
312 | if (read1 == -1) |
---|
313 | error (2, errno, "%s", file[1]); |
---|
314 | |
---|
315 | /* Insert sentinels for the block compare. */ |
---|
316 | |
---|
317 | buf0[read0] = ~buf1[read0]; |
---|
318 | buf1[read1] = ~buf0[read1]; |
---|
319 | |
---|
320 | /* If the line number should be written for differing files, |
---|
321 | compare the blocks and count the number of newlines |
---|
322 | simultaneously. */ |
---|
323 | first_diff = (comparison_type == type_first_diff |
---|
324 | ? block_compare_and_count (buf0, buf1, &line_number) |
---|
325 | : block_compare (buf0, buf1)); |
---|
326 | |
---|
327 | char_number += first_diff; |
---|
328 | smaller = min (read0, read1); |
---|
329 | |
---|
330 | if (first_diff < smaller) |
---|
331 | { |
---|
332 | switch (comparison_type) |
---|
333 | { |
---|
334 | case type_first_diff: |
---|
335 | /* See Posix.2 section 4.10.6.1 for this format. */ |
---|
336 | printf ("%s %s differ: char %lu, line %lu", |
---|
337 | file[0], file[1], char_number, line_number); |
---|
338 | if (opt_print_chars) |
---|
339 | { |
---|
340 | unsigned char c0 = buf0[first_diff]; |
---|
341 | unsigned char c1 = buf1[first_diff]; |
---|
342 | printf (" is %3o ", c0); |
---|
343 | printc (0, c0); |
---|
344 | printf (" %3o ", c1); |
---|
345 | printc (0, c1); |
---|
346 | } |
---|
347 | putchar ('\n'); |
---|
348 | /* Fall through. */ |
---|
349 | case type_status: |
---|
350 | return 1; |
---|
351 | |
---|
352 | case type_all_diffs: |
---|
353 | do |
---|
354 | { |
---|
355 | unsigned char c0 = buf0[first_diff]; |
---|
356 | unsigned char c1 = buf1[first_diff]; |
---|
357 | if (c0 != c1) |
---|
358 | { |
---|
359 | if (opt_print_chars) |
---|
360 | { |
---|
361 | printf ("%6lu %3o ", char_number, c0); |
---|
362 | printc (4, c0); |
---|
363 | printf (" %3o ", c1); |
---|
364 | printc (0, c1); |
---|
365 | putchar ('\n'); |
---|
366 | } |
---|
367 | else |
---|
368 | /* See Posix.2 section 4.10.6.1 for this format. */ |
---|
369 | printf ("%6lu %3o %3o\n", char_number, c0, c1); |
---|
370 | } |
---|
371 | char_number++; |
---|
372 | first_diff++; |
---|
373 | } |
---|
374 | while (first_diff < smaller); |
---|
375 | ret = 1; |
---|
376 | break; |
---|
377 | } |
---|
378 | } |
---|
379 | |
---|
380 | if (read0 != read1) |
---|
381 | { |
---|
382 | if (comparison_type != type_status) |
---|
383 | /* See Posix.2 section 4.10.6.2 for this format. */ |
---|
384 | fprintf (stderr, "cmp: EOF on %s\n", file[read1 < read0]); |
---|
385 | |
---|
386 | return 1; |
---|
387 | } |
---|
388 | } |
---|
389 | while (read0 == buf_size); |
---|
390 | return ret; |
---|
391 | } |
---|
392 | |
---|
393 | /* Compare two blocks of memory P0 and P1 until they differ, |
---|
394 | and count the number of '\n' occurrences in the common |
---|
395 | part of P0 and P1. |
---|
396 | Assumes that P0 and P1 are aligned at word addresses! |
---|
397 | If the blocks are not guaranteed to be different, put sentinels at the ends |
---|
398 | of the blocks before calling this function. |
---|
399 | |
---|
400 | Return the offset of the first byte that differs. |
---|
401 | Increment *COUNT by the count of '\n' occurrences. */ |
---|
402 | |
---|
403 | static size_t |
---|
404 | block_compare_and_count (p0, p1, count) |
---|
405 | char const *p0, *p1; |
---|
406 | long *count; |
---|
407 | { |
---|
408 | word l; /* One word from first buffer. */ |
---|
409 | word const *l0, *l1; /* Pointers into each buffer. */ |
---|
410 | char const *c0, *c1; /* Pointers for finding exact address. */ |
---|
411 | long cnt = 0; /* Number of '\n' occurrences. */ |
---|
412 | word nnnn; /* Newline, sizeof (word) times. */ |
---|
413 | int i; |
---|
414 | |
---|
415 | l0 = (word const *) p0; |
---|
416 | l1 = (word const *) p1; |
---|
417 | |
---|
418 | nnnn = 0; |
---|
419 | for (i = 0; i < sizeof (word); i++) |
---|
420 | nnnn = (nnnn << CHAR_BIT) | '\n'; |
---|
421 | |
---|
422 | /* Find the rough position of the first difference by reading words, |
---|
423 | not bytes. */ |
---|
424 | |
---|
425 | while ((l = *l0++) == *l1++) |
---|
426 | { |
---|
427 | l ^= nnnn; |
---|
428 | for (i = 0; i < sizeof (word); i++) |
---|
429 | { |
---|
430 | cnt += ! (unsigned char) l; |
---|
431 | l >>= CHAR_BIT; |
---|
432 | } |
---|
433 | } |
---|
434 | |
---|
435 | /* Find the exact differing position (endianness independent). */ |
---|
436 | |
---|
437 | c0 = (char const *) (l0 - 1); |
---|
438 | c1 = (char const *) (l1 - 1); |
---|
439 | while (*c0 == *c1) |
---|
440 | { |
---|
441 | cnt += *c0 == '\n'; |
---|
442 | c0++; |
---|
443 | c1++; |
---|
444 | } |
---|
445 | |
---|
446 | *count += cnt; |
---|
447 | return c0 - p0; |
---|
448 | } |
---|
449 | |
---|
450 | /* Compare two blocks of memory P0 and P1 until they differ. |
---|
451 | Assumes that P0 and P1 are aligned at word addresses! |
---|
452 | If the blocks are not guaranteed to be different, put sentinels at the ends |
---|
453 | of the blocks before calling this function. |
---|
454 | |
---|
455 | Return the offset of the first byte that differs. */ |
---|
456 | |
---|
457 | static size_t |
---|
458 | block_compare (p0, p1) |
---|
459 | char const *p0, *p1; |
---|
460 | { |
---|
461 | word const *l0, *l1; |
---|
462 | char const *c0, *c1; |
---|
463 | |
---|
464 | l0 = (word const *) p0; |
---|
465 | l1 = (word const *) p1; |
---|
466 | |
---|
467 | /* Find the rough position of the first difference by reading words, |
---|
468 | not bytes. */ |
---|
469 | |
---|
470 | while (*l0++ == *l1++) |
---|
471 | ; |
---|
472 | |
---|
473 | /* Find the exact differing position (endianness independent). */ |
---|
474 | |
---|
475 | c0 = (char const *) (l0 - 1); |
---|
476 | c1 = (char const *) (l1 - 1); |
---|
477 | while (*c0 == *c1) |
---|
478 | { |
---|
479 | c0++; |
---|
480 | c1++; |
---|
481 | } |
---|
482 | |
---|
483 | return c0 - p0; |
---|
484 | } |
---|
485 | |
---|
486 | /* Read NCHARS bytes from descriptor FD into BUF. |
---|
487 | Return the number of characters successfully read. |
---|
488 | The number returned is always NCHARS unless end-of-file or error. */ |
---|
489 | |
---|
490 | static size_t |
---|
491 | block_read (fd, buf, nchars) |
---|
492 | int fd; |
---|
493 | char *buf; |
---|
494 | size_t nchars; |
---|
495 | { |
---|
496 | char *bp = buf; |
---|
497 | |
---|
498 | do |
---|
499 | { |
---|
500 | size_t nread = read (fd, bp, nchars); |
---|
501 | if (nread == -1) |
---|
502 | return -1; |
---|
503 | if (nread == 0) |
---|
504 | break; |
---|
505 | bp += nread; |
---|
506 | nchars -= nread; |
---|
507 | } |
---|
508 | while (nchars != 0); |
---|
509 | |
---|
510 | return bp - buf; |
---|
511 | } |
---|
512 | |
---|
513 | /* Print character C, making unprintable characters |
---|
514 | visible by quoting like cat -t does. |
---|
515 | Pad with spaces on the right to WIDTH characters. */ |
---|
516 | |
---|
517 | static void |
---|
518 | printc (width, c) |
---|
519 | int width; |
---|
520 | unsigned c; |
---|
521 | { |
---|
522 | register FILE *fs = stdout; |
---|
523 | |
---|
524 | if (! ISPRINT (c)) |
---|
525 | { |
---|
526 | if (c >= 128) |
---|
527 | { |
---|
528 | putc ('M', fs); |
---|
529 | putc ('-', fs); |
---|
530 | c -= 128; |
---|
531 | width -= 2; |
---|
532 | } |
---|
533 | if (c < 32) |
---|
534 | { |
---|
535 | putc ('^', fs); |
---|
536 | c += 64; |
---|
537 | --width; |
---|
538 | } |
---|
539 | else if (c == 127) |
---|
540 | { |
---|
541 | putc ('^', fs); |
---|
542 | c = '?'; |
---|
543 | --width; |
---|
544 | } |
---|
545 | } |
---|
546 | |
---|
547 | putc (c, fs); |
---|
548 | while (--width > 0) |
---|
549 | putc (' ', fs); |
---|
550 | } |
---|
551 | |
---|
552 | /* Position file I to `ignore_initial' bytes from its initial position, |
---|
553 | and yield its new position. Don't try more than once. */ |
---|
554 | |
---|
555 | static off_t |
---|
556 | file_position (i) |
---|
557 | int i; |
---|
558 | { |
---|
559 | static int positioned[2]; |
---|
560 | static off_t position[2]; |
---|
561 | |
---|
562 | if (! positioned[i]) |
---|
563 | { |
---|
564 | positioned[i] = 1; |
---|
565 | position[i] = lseek (file_desc[i], ignore_initial, SEEK_CUR); |
---|
566 | } |
---|
567 | return position[i]; |
---|
568 | } |
---|