source: trunk/third/diffutils/cmp.c @ 16149

Revision 16149, 13.7 KB checked in by rbasch, 24 years ago (diff)
This commit was generated by cvs2svn to compensate for changes in r16148, which included commits to RCS files with non-trunk default branches.
Line 
1/* cmp -- compare two files.
2   Copyright (C) 1990, 1991, 1992, 1993, 1994 Free Software Foundation, Inc.
3
4   This program is free software; you can redistribute it and/or modify
5   it under the terms of the GNU General Public License as published by
6   the Free Software Foundation; either version 2, or (at your option)
7   any later version.
8
9   This program is distributed in the hope that it will be useful,
10   but WITHOUT ANY WARRANTY; without even the implied warranty of
11   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12   GNU General Public License for more details.
13
14   You should have received a copy of the GNU General Public License
15   along with this program; if not, write to the Free Software
16   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.  */
17
18/* Written by Torbjorn Granlund and David MacKenzie. */
19
20#include "system.h"
21#include <stdio.h>
22#include "getopt.h"
23#include "cmpbuf.h"
24
25extern char const version_string[];
26
27#if __STDC__ && defined (HAVE_VPRINTF)
28void error (int, int, char const *, ...);
29#else
30void error ();
31#endif
32VOID *xmalloc PARAMS((size_t));
33
34static int cmp PARAMS((void));
35static off_t file_position PARAMS((int));
36static size_t block_compare PARAMS((char const *, char const *));
37static size_t block_compare_and_count PARAMS((char const *, char const *, long *));
38static size_t block_read PARAMS((int, char *, size_t));
39static void printc PARAMS((int, unsigned));
40static void try_help PARAMS((char const *));
41static void check_stdout PARAMS((void));
42static void usage PARAMS((void));
43
44/* Name under which this program was invoked.  */
45char const *program_name;
46
47/* Filenames of the compared files.  */
48static char const *file[2];
49
50/* File descriptors of the files.  */
51static int file_desc[2];
52
53/* Read buffers for the files.  */
54static char *buffer[2];
55
56/* Optimal block size for the files.  */
57static size_t buf_size;
58
59/* Initial prefix to ignore for each file.  */
60static off_t ignore_initial;
61
62/* Output format:
63   type_first_diff
64     to print the offset and line number of the first differing bytes
65   type_all_diffs
66     to print the (decimal) offsets and (octal) values of all differing bytes
67   type_status
68     to only return an exit status indicating whether the files differ */
69static enum
70  {
71    type_first_diff, type_all_diffs, type_status
72  } comparison_type;
73
74/* Type used for fast comparison of several bytes at a time.  */
75#ifndef word
76#define word int
77#endif
78
79/* If nonzero, print values of bytes quoted like cat -t does. */
80static int opt_print_chars;
81
82static struct option const long_options[] =
83{
84  {"print-chars", 0, 0, 'c'},
85  {"ignore-initial", 1, 0, 'i'},
86  {"verbose", 0, 0, 'l'},
87  {"silent", 0, 0, 's'},
88  {"quiet", 0, 0, 's'},
89  {"version", 0, 0, 'v'},
90  {"help", 0, 0, 129},
91  {0, 0, 0, 0}
92};
93
94static void
95try_help (reason)
96     char const *reason;
97{
98  if (reason)
99    error (0, 0, "%s", reason);
100  error (2, 0, "Try `%s --help' for more information.", program_name);
101}
102
103static void
104check_stdout ()
105{
106  if (ferror (stdout))
107    error (2, 0, "write error");
108  else if (fclose (stdout) != 0)
109    error (2, errno, "write error");
110}
111
112static void
113usage ()
114{
115  printf ("Usage: %s [OPTION]... FILE1 [FILE2]\n", program_name);
116  printf ("%s", "\
117  -c  --print-chars  Output differing bytes as characters.\n\
118  -i N  --ignore-initial=N  Ignore differences in the first N bytes of input.\n\
119  -l  --verbose  Output offsets and codes of all differing bytes.\n\
120  -s  --quiet  --silent  Output nothing; yield exit status only.\n\
121  -v  --version  Output version info.\n\
122  --help  Output this help.\n");
123  printf ("If a FILE is `-' or missing, read standard input.\n");
124}
125
126int
127main (argc, argv)
128     int argc;
129     char *argv[];
130{
131  int c, i, exit_status;
132  struct stat stat_buf[2];
133
134  initialize_main (&argc, &argv);
135  program_name = argv[0];
136
137  /* Parse command line options.  */
138
139  while ((c = getopt_long (argc, argv, "ci:lsv", long_options, 0))
140         != EOF)
141    switch (c)
142      {
143      case 'c':
144        opt_print_chars = 1;
145        break;
146
147      case 'i':
148        ignore_initial = 0;
149        while (*optarg)
150          {
151            /* Don't use `atol', because `off_t' may be longer than `long'.  */
152            unsigned digit = *optarg++ - '0';
153            if (9 < digit)
154              try_help ("non-digit in --ignore-initial value");
155            ignore_initial = 10 * ignore_initial + digit;
156          }
157        break;
158
159      case 'l':
160        comparison_type = type_all_diffs;
161        break;
162
163      case 's':
164        comparison_type = type_status;
165        break;
166
167      case 'v':
168        printf ("cmp - GNU diffutils version %s\n", version_string);
169        exit (0);
170
171      case 129:
172        usage ();
173        check_stdout ();
174        exit (0);
175
176      default:
177        try_help (0);
178      }
179
180  if (optind == argc)
181    try_help ("missing operand");
182
183  file[0] = argv[optind++];
184  file[1] = optind < argc ? argv[optind++] : "-";
185
186  if (optind < argc)
187    try_help ("extra operands");
188
189  for (i = 0; i < 2; i++)
190    {
191      /* If file[1] is "-", treat it first; this avoids a misdiagnostic if
192         stdin is closed and opening file[0] yields file descriptor 0.  */
193      int i1 = i ^ (strcmp (file[1], "-") == 0);
194
195      /* Two files with the same name are identical.
196         But wait until we open the file once, for proper diagnostics.  */
197      if (i && filename_cmp (file[0], file[1]) == 0)
198        exit (0);
199
200      file_desc[i1] = (strcmp (file[i1], "-") == 0
201                       ? STDIN_FILENO
202                       : open (file[i1], O_RDONLY, 0));
203      if (file_desc[i1] < 0 || fstat (file_desc[i1], &stat_buf[i1]) != 0)
204        {
205          if (file_desc[i1] < 0 && comparison_type == type_status)
206            exit (2);
207          else
208            error (2, errno, "%s", file[i1]);
209        }
210#if HAVE_SETMODE
211      setmode (file_desc[i1], O_BINARY);
212#endif
213    }
214
215  /* If the files are links to the same inode and have the same file position,
216     they are identical.  */
217
218  if (0 < same_file (&stat_buf[0], &stat_buf[1])
219      && file_position (0) == file_position (1))
220    exit (0);
221
222  /* If output is redirected to the null device, we may assume `-s'.  */
223
224  if (comparison_type != type_status)
225    {
226      struct stat outstat, nullstat;
227
228      if (fstat (STDOUT_FILENO, &outstat) == 0
229          && stat (NULL_DEVICE, &nullstat) == 0
230          && 0 < same_file (&outstat, &nullstat))
231        comparison_type = type_status;
232    }
233
234  /* If only a return code is needed,
235     and if both input descriptors are associated with plain files,
236     conclude that the files differ if they have different sizes.  */
237
238  if (comparison_type == type_status
239      && S_ISREG (stat_buf[0].st_mode)
240      && S_ISREG (stat_buf[1].st_mode))
241    {
242      off_t s0 = stat_buf[0].st_size - file_position (0);
243      off_t s1 = stat_buf[1].st_size - file_position (1);
244
245      if (max (0, s0) != max (0, s1))
246        exit (1);
247    }
248
249  /* Get the optimal block size of the files.  */
250
251  buf_size = buffer_lcm (STAT_BLOCKSIZE (stat_buf[0]),
252                         STAT_BLOCKSIZE (stat_buf[1]));
253
254  /* Allocate buffers, with space for sentinels at the end.  */
255
256  for (i = 0; i < 2; i++)
257    buffer[i] = xmalloc (buf_size + sizeof (word));
258
259  exit_status = cmp ();
260
261  for (i = 0; i < 2; i++)
262    if (close (file_desc[i]) != 0)
263      error (2, errno, "%s", file[i]);
264  if (exit_status != 0  &&  comparison_type != type_status)
265    check_stdout ();
266  exit (exit_status);
267  return exit_status;
268}
269
270/* Compare the two files already open on `file_desc[0]' and `file_desc[1]',
271   using `buffer[0]' and `buffer[1]'.
272   Return 0 if identical, 1 if different, >1 if error. */
273
274static int
275cmp ()
276{
277  long line_number = 1;         /* Line number (1...) of first difference. */
278  long char_number = ignore_initial + 1;
279                                /* Offset (1...) in files of 1st difference. */
280  size_t read0, read1;          /* Number of chars read from each file. */
281  size_t first_diff;            /* Offset (0...) in buffers of 1st diff. */
282  size_t smaller;               /* The lesser of `read0' and `read1'. */
283  char *buf0 = buffer[0];
284  char *buf1 = buffer[1];
285  int ret = 0;
286  int i;
287
288  if (ignore_initial)
289    for (i = 0; i < 2; i++)
290      if (file_position (i) == -1)
291        {
292          /* lseek failed; read and discard the ignored initial prefix.  */
293          off_t ig = ignore_initial;
294          do
295            {
296              size_t r = read (file_desc[i], buf0, (size_t) min (ig, buf_size));
297              if (!r)
298                break;
299              if (r == -1)
300                error (2, errno, "%s", file[i]);
301              ig -= r;
302            }
303          while (ig);
304        }
305
306  do
307    {
308      read0 = block_read (file_desc[0], buf0, buf_size);
309      if (read0 == -1)
310        error (2, errno, "%s", file[0]);
311      read1 = block_read (file_desc[1], buf1, buf_size);
312      if (read1 == -1)
313        error (2, errno, "%s", file[1]);
314
315      /* Insert sentinels for the block compare.  */
316
317      buf0[read0] = ~buf1[read0];
318      buf1[read1] = ~buf0[read1];
319
320      /* If the line number should be written for differing files,
321         compare the blocks and count the number of newlines
322         simultaneously.  */
323      first_diff = (comparison_type == type_first_diff
324                    ? block_compare_and_count (buf0, buf1, &line_number)
325                    : block_compare (buf0, buf1));
326
327      char_number += first_diff;
328      smaller = min (read0, read1);
329
330      if (first_diff < smaller)
331        {
332          switch (comparison_type)
333            {
334            case type_first_diff:
335              /* See Posix.2 section 4.10.6.1 for this format.  */
336              printf ("%s %s differ: char %lu, line %lu",
337                      file[0], file[1], char_number, line_number);
338              if (opt_print_chars)
339                {
340                  unsigned char c0 = buf0[first_diff];
341                  unsigned char c1 = buf1[first_diff];
342                  printf (" is %3o ", c0);
343                  printc (0, c0);
344                  printf (" %3o ", c1);
345                  printc (0, c1);
346                }
347              putchar ('\n');
348              /* Fall through. */
349            case type_status:
350              return 1;
351
352            case type_all_diffs:
353              do
354                {
355                  unsigned char c0 = buf0[first_diff];
356                  unsigned char c1 = buf1[first_diff];
357                  if (c0 != c1)
358                    {
359                      if (opt_print_chars)
360                        {
361                          printf ("%6lu %3o ", char_number, c0);
362                          printc (4, c0);
363                          printf (" %3o ", c1);
364                          printc (0, c1);
365                          putchar ('\n');
366                        }
367                      else
368                        /* See Posix.2 section 4.10.6.1 for this format.  */
369                        printf ("%6lu %3o %3o\n", char_number, c0, c1);
370                    }
371                  char_number++;
372                  first_diff++;
373                }
374              while (first_diff < smaller);
375              ret = 1;
376              break;
377            }
378        }
379
380      if (read0 != read1)
381        {
382          if (comparison_type != type_status)
383            /* See Posix.2 section 4.10.6.2 for this format.  */
384            fprintf (stderr, "cmp: EOF on %s\n", file[read1 < read0]);
385
386          return 1;
387        }
388    }
389  while (read0 == buf_size);
390  return ret;
391}
392
393/* Compare two blocks of memory P0 and P1 until they differ,
394   and count the number of '\n' occurrences in the common
395   part of P0 and P1.
396   Assumes that P0 and P1 are aligned at word addresses!
397   If the blocks are not guaranteed to be different, put sentinels at the ends
398   of the blocks before calling this function.
399
400   Return the offset of the first byte that differs.
401   Increment *COUNT by the count of '\n' occurrences.  */
402
403static size_t
404block_compare_and_count (p0, p1, count)
405     char const *p0, *p1;
406     long *count;
407{
408  word l;               /* One word from first buffer. */
409  word const *l0, *l1;  /* Pointers into each buffer. */
410  char const *c0, *c1;  /* Pointers for finding exact address. */
411  long cnt = 0;         /* Number of '\n' occurrences. */
412  word nnnn;            /* Newline, sizeof (word) times.  */
413  int i;
414
415  l0 = (word const *) p0;
416  l1 = (word const *) p1;
417
418  nnnn = 0;
419  for (i = 0; i < sizeof (word); i++)
420    nnnn = (nnnn << CHAR_BIT) | '\n';
421
422  /* Find the rough position of the first difference by reading words,
423     not bytes.  */
424
425  while ((l = *l0++) == *l1++)
426    {
427      l ^= nnnn;
428      for (i = 0; i < sizeof (word); i++)
429        {
430          cnt += ! (unsigned char) l;
431          l >>= CHAR_BIT;
432        }
433    }
434
435  /* Find the exact differing position (endianness independent).  */
436
437  c0 = (char const *) (l0 - 1);
438  c1 = (char const *) (l1 - 1);
439  while (*c0 == *c1)
440    {
441      cnt += *c0 == '\n';
442      c0++;
443      c1++;
444    }
445
446  *count += cnt;
447  return c0 - p0;
448}
449
450/* Compare two blocks of memory P0 and P1 until they differ.
451   Assumes that P0 and P1 are aligned at word addresses!
452   If the blocks are not guaranteed to be different, put sentinels at the ends
453   of the blocks before calling this function.
454
455   Return the offset of the first byte that differs.  */
456
457static size_t
458block_compare (p0, p1)
459     char const *p0, *p1;
460{
461  word const *l0, *l1;
462  char const *c0, *c1;
463
464  l0 = (word const *) p0;
465  l1 = (word const *) p1;
466
467  /* Find the rough position of the first difference by reading words,
468     not bytes.  */
469
470  while (*l0++ == *l1++)
471    ;
472
473  /* Find the exact differing position (endianness independent).  */
474
475  c0 = (char const *) (l0 - 1);
476  c1 = (char const *) (l1 - 1);
477  while (*c0 == *c1)
478    {
479      c0++;
480      c1++;
481    }
482
483  return c0 - p0;
484}
485
486/* Read NCHARS bytes from descriptor FD into BUF.
487   Return the number of characters successfully read.
488   The number returned is always NCHARS unless end-of-file or error.  */
489
490static size_t
491block_read (fd, buf, nchars)
492     int fd;
493     char *buf;
494     size_t nchars;
495{
496  char *bp = buf;
497
498  do
499    {
500      size_t nread = read (fd, bp, nchars);
501      if (nread == -1)
502        return -1;
503      if (nread == 0)
504        break;
505      bp += nread;
506      nchars -= nread;
507    }
508  while (nchars != 0);
509
510  return bp - buf;
511}
512
513/* Print character C, making unprintable characters
514   visible by quoting like cat -t does.
515   Pad with spaces on the right to WIDTH characters.  */
516
517static void
518printc (width, c)
519     int width;
520     unsigned c;
521{
522  register FILE *fs = stdout;
523
524  if (! ISPRINT (c))
525    {
526      if (c >= 128)
527        {
528          putc ('M', fs);
529          putc ('-', fs);
530          c -= 128;
531          width -= 2;
532        }
533      if (c < 32)
534        {
535          putc ('^', fs);
536          c += 64;
537          --width;
538        }
539      else if (c == 127)
540        {
541          putc ('^', fs);
542          c = '?';
543          --width;
544        }
545    }
546
547  putc (c, fs);
548  while (--width > 0)
549    putc (' ', fs);
550}
551
552/* Position file I to `ignore_initial' bytes from its initial position,
553   and yield its new position.  Don't try more than once.  */
554
555static off_t
556file_position (i)
557     int i;
558{
559  static int positioned[2];
560  static off_t position[2];
561
562  if (! positioned[i])
563    {
564      positioned[i] = 1;
565      position[i] = lseek (file_desc[i], ignore_initial, SEEK_CUR);
566    }
567  return position[i];
568}
Note: See TracBrowser for help on using the repository browser.