source: trunk/third/diffutils/analyze.c @ 16149

Revision 16149, 30.6 KB checked in by rbasch, 24 years ago (diff)
This commit was generated by cvs2svn to compensate for changes in r16148, which included commits to RCS files with non-trunk default branches.
Line 
1/* Analyze file differences for GNU DIFF.
2   Copyright (C) 1988, 1989, 1992, 1993 Free Software Foundation, Inc.
3
4This file is part of GNU DIFF.
5
6GNU DIFF is free software; you can redistribute it and/or modify
7it under the terms of the GNU General Public License as published by
8the Free Software Foundation; either version 2, or (at your option)
9any later version.
10
11GNU DIFF is distributed in the hope that it will be useful,
12but WITHOUT ANY WARRANTY; without even the implied warranty of
13MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14GNU General Public License for more details.
15
16You should have received a copy of the GNU General Public License
17along with GNU DIFF; see the file COPYING.  If not, write to
18the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.  */
19
20/* The basic algorithm is described in:
21   "An O(ND) Difference Algorithm and its Variations", Eugene Myers,
22   Algorithmica Vol. 1 No. 2, 1986, pp. 251-266;
23   see especially section 4.2, which describes the variation used below.
24   Unless the --minimal option is specified, this code uses the TOO_EXPENSIVE
25   heuristic, by Paul Eggert, to limit the cost to O(N**1.5 log N)
26   at the price of producing suboptimal output for large inputs with
27   many differences.
28
29   The basic algorithm was independently discovered as described in:
30   "Algorithms for Approximate String Matching", E. Ukkonen,
31   Information and Control Vol. 64, 1985, pp. 100-118.  */
32
33#include "diff.h"
34#include "cmpbuf.h"
35
36extern int no_discards;
37
38static int *xvec, *yvec;        /* Vectors being compared. */
39static int *fdiag;              /* Vector, indexed by diagonal, containing
40                                   1 + the X coordinate of the point furthest
41                                   along the given diagonal in the forward
42                                   search of the edit matrix. */
43static int *bdiag;              /* Vector, indexed by diagonal, containing
44                                   the X coordinate of the point furthest
45                                   along the given diagonal in the backward
46                                   search of the edit matrix. */
47static int too_expensive;       /* Edit scripts longer than this are too
48                                   expensive to compute.  */
49
50#define SNAKE_LIMIT 20  /* Snakes bigger than this are considered `big'.  */
51
52struct partition
53{
54  int xmid, ymid;       /* Midpoints of this partition.  */
55  int lo_minimal;       /* Nonzero if low half will be analyzed minimally.  */
56  int hi_minimal;       /* Likewise for high half.  */
57};
58
59static int diag PARAMS((int, int, int, int, int, struct partition *));
60static struct change *add_change PARAMS((int, int, int, int, struct change *));
61static struct change *build_reverse_script PARAMS((struct file_data const[]));
62static struct change *build_script PARAMS((struct file_data const[]));
63static void briefly_report PARAMS((int, struct file_data const[]));
64static void compareseq PARAMS((int, int, int, int, int));
65static void discard_confusing_lines PARAMS((struct file_data[]));
66static void shift_boundaries PARAMS((struct file_data[]));
67
68/* Find the midpoint of the shortest edit script for a specified
69   portion of the two files.
70
71   Scan from the beginnings of the files, and simultaneously from the ends,
72   doing a breadth-first search through the space of edit-sequence.
73   When the two searches meet, we have found the midpoint of the shortest
74   edit sequence.
75
76   If MINIMAL is nonzero, find the minimal edit script regardless
77   of expense.  Otherwise, if the search is too expensive, use
78   heuristics to stop the search and report a suboptimal answer.
79
80   Set PART->(XMID,YMID) to the midpoint (XMID,YMID).  The diagonal number
81   XMID - YMID equals the number of inserted lines minus the number
82   of deleted lines (counting only lines before the midpoint).
83   Return the approximate edit cost; this is the total number of
84   lines inserted or deleted (counting only lines before the midpoint),
85   unless a heuristic is used to terminate the search prematurely.
86
87   Set PART->LEFT_MINIMAL to nonzero iff the minimal edit script for the
88   left half of the partition is known; similarly for PART->RIGHT_MINIMAL.
89
90   This function assumes that the first lines of the specified portions
91   of the two files do not match, and likewise that the last lines do not
92   match.  The caller must trim matching lines from the beginning and end
93   of the portions it is going to specify.
94
95   If we return the "wrong" partitions,
96   the worst this can do is cause suboptimal diff output.
97   It cannot cause incorrect diff output.  */
98
99static int
100diag (xoff, xlim, yoff, ylim, minimal, part)
101     int xoff, xlim, yoff, ylim, minimal;
102     struct partition *part;
103{
104  int *const fd = fdiag;        /* Give the compiler a chance. */
105  int *const bd = bdiag;        /* Additional help for the compiler. */
106  int const *const xv = xvec;   /* Still more help for the compiler. */
107  int const *const yv = yvec;   /* And more and more . . . */
108  int const dmin = xoff - ylim; /* Minimum valid diagonal. */
109  int const dmax = xlim - yoff; /* Maximum valid diagonal. */
110  int const fmid = xoff - yoff; /* Center diagonal of top-down search. */
111  int const bmid = xlim - ylim; /* Center diagonal of bottom-up search. */
112  int fmin = fmid, fmax = fmid; /* Limits of top-down search. */
113  int bmin = bmid, bmax = bmid; /* Limits of bottom-up search. */
114  int c;                        /* Cost. */
115  int odd = (fmid - bmid) & 1;  /* True if southeast corner is on an odd
116                                   diagonal with respect to the northwest. */
117
118  fd[fmid] = xoff;
119  bd[bmid] = xlim;
120
121  for (c = 1;; ++c)
122    {
123      int d;                    /* Active diagonal. */
124      int big_snake = 0;
125
126      /* Extend the top-down search by an edit step in each diagonal. */
127      fmin > dmin ? fd[--fmin - 1] = -1 : ++fmin;
128      fmax < dmax ? fd[++fmax + 1] = -1 : --fmax;
129      for (d = fmax; d >= fmin; d -= 2)
130        {
131          int x, y, oldx, tlo = fd[d - 1], thi = fd[d + 1];
132
133          if (tlo >= thi)
134            x = tlo + 1;
135          else
136            x = thi;
137          oldx = x;
138          y = x - d;
139          while (x < xlim && y < ylim && xv[x] == yv[y])
140            ++x, ++y;
141          if (x - oldx > SNAKE_LIMIT)
142            big_snake = 1;
143          fd[d] = x;
144          if (odd && bmin <= d && d <= bmax && bd[d] <= x)
145            {
146              part->xmid = x;
147              part->ymid = y;
148              part->lo_minimal = part->hi_minimal = 1;
149              return 2 * c - 1;
150            }
151        }
152
153      /* Similarly extend the bottom-up search.  */
154      bmin > dmin ? bd[--bmin - 1] = INT_MAX : ++bmin;
155      bmax < dmax ? bd[++bmax + 1] = INT_MAX : --bmax;
156      for (d = bmax; d >= bmin; d -= 2)
157        {
158          int x, y, oldx, tlo = bd[d - 1], thi = bd[d + 1];
159
160          if (tlo < thi)
161            x = tlo;
162          else
163            x = thi - 1;
164          oldx = x;
165          y = x - d;
166          while (x > xoff && y > yoff && xv[x - 1] == yv[y - 1])
167            --x, --y;
168          if (oldx - x > SNAKE_LIMIT)
169            big_snake = 1;
170          bd[d] = x;
171          if (!odd && fmin <= d && d <= fmax && x <= fd[d])
172            {
173              part->xmid = x;
174              part->ymid = y;
175              part->lo_minimal = part->hi_minimal = 1;
176              return 2 * c;
177            }
178        }
179
180      if (minimal)
181        continue;
182
183      /* Heuristic: check occasionally for a diagonal that has made
184         lots of progress compared with the edit distance.
185         If we have any such, find the one that has made the most
186         progress and return it as if it had succeeded.
187
188         With this heuristic, for files with a constant small density
189         of changes, the algorithm is linear in the file size.  */
190
191      if (c > 200 && big_snake && heuristic)
192        {
193          int best;
194
195          best = 0;
196          for (d = fmax; d >= fmin; d -= 2)
197            {
198              int dd = d - fmid;
199              int x = fd[d];
200              int y = x - d;
201              int v = (x - xoff) * 2 - dd;
202              if (v > 12 * (c + (dd < 0 ? -dd : dd)))
203                {
204                  if (v > best
205                      && xoff + SNAKE_LIMIT <= x && x < xlim
206                      && yoff + SNAKE_LIMIT <= y && y < ylim)
207                    {
208                      /* We have a good enough best diagonal;
209                         now insist that it end with a significant snake.  */
210                      int k;
211
212                      for (k = 1; xv[x - k] == yv[y - k]; k++)
213                        if (k == SNAKE_LIMIT)
214                          {
215                            best = v;
216                            part->xmid = x;
217                            part->ymid = y;
218                            break;
219                          }
220                    }
221                }
222            }
223          if (best > 0)
224            {
225              part->lo_minimal = 1;
226              part->hi_minimal = 0;
227              return 2 * c - 1;
228            }
229
230          best = 0;
231          for (d = bmax; d >= bmin; d -= 2)
232            {
233              int dd = d - bmid;
234              int x = bd[d];
235              int y = x - d;
236              int v = (xlim - x) * 2 + dd;
237              if (v > 12 * (c + (dd < 0 ? -dd : dd)))
238                {
239                  if (v > best
240                      && xoff < x && x <= xlim - SNAKE_LIMIT
241                      && yoff < y && y <= ylim - SNAKE_LIMIT)
242                    {
243                      /* We have a good enough best diagonal;
244                         now insist that it end with a significant snake.  */
245                      int k;
246
247                      for (k = 0; xv[x + k] == yv[y + k]; k++)
248                        if (k == SNAKE_LIMIT - 1)
249                          {
250                            best = v;
251                            part->xmid = x;
252                            part->ymid = y;
253                            break;
254                          }
255                    }
256                }
257            }
258          if (best > 0)
259            {
260              part->lo_minimal = 0;
261              part->hi_minimal = 1;
262              return 2 * c - 1;
263            }
264        }
265
266      /* Heuristic: if we've gone well beyond the call of duty,
267         give up and report halfway between our best results so far.  */
268      if (c >= too_expensive)
269        {
270          int fxybest, fxbest;
271          int bxybest, bxbest;
272
273          fxbest = bxbest = 0;  /* Pacify `gcc -Wall'.  */
274
275          /* Find forward diagonal that maximizes X + Y.  */
276          fxybest = -1;
277          for (d = fmax; d >= fmin; d -= 2)
278            {
279              int x = min (fd[d], xlim);
280              int y = x - d;
281              if (ylim < y)
282                x = ylim + d, y = ylim;
283              if (fxybest < x + y)
284                {
285                  fxybest = x + y;
286                  fxbest = x;
287                }
288            }
289
290          /* Find backward diagonal that minimizes X + Y.  */
291          bxybest = INT_MAX;
292          for (d = bmax; d >= bmin; d -= 2)
293            {
294              int x = max (xoff, bd[d]);
295              int y = x - d;
296              if (y < yoff)
297                x = yoff + d, y = yoff;
298              if (x + y < bxybest)
299                {
300                  bxybest = x + y;
301                  bxbest = x;
302                }
303            }
304
305          /* Use the better of the two diagonals.  */
306          if ((xlim + ylim) - bxybest < fxybest - (xoff + yoff))
307            {
308              part->xmid = fxbest;
309              part->ymid = fxybest - fxbest;
310              part->lo_minimal = 1;
311              part->hi_minimal = 0;
312            }
313          else
314            {
315              part->xmid = bxbest;
316              part->ymid = bxybest - bxbest;
317              part->lo_minimal = 0;
318              part->hi_minimal = 1;
319            }
320          return 2 * c - 1;
321        }
322    }
323}
324
325/* Compare in detail contiguous subsequences of the two files
326   which are known, as a whole, to match each other.
327
328   The results are recorded in the vectors files[N].changed_flag, by
329   storing a 1 in the element for each line that is an insertion or deletion.
330
331   The subsequence of file 0 is [XOFF, XLIM) and likewise for file 1.
332
333   Note that XLIM, YLIM are exclusive bounds.
334   All line numbers are origin-0 and discarded lines are not counted.
335 
336   If MINIMAL is nonzero, find a minimal difference no matter how
337   expensive it is.  */
338
339static void
340compareseq (xoff, xlim, yoff, ylim, minimal)
341     int xoff, xlim, yoff, ylim, minimal;
342{
343  int * const xv = xvec; /* Help the compiler.  */
344  int * const yv = yvec;
345
346  /* Slide down the bottom initial diagonal. */
347  while (xoff < xlim && yoff < ylim && xv[xoff] == yv[yoff])
348    ++xoff, ++yoff;
349  /* Slide up the top initial diagonal. */
350  while (xlim > xoff && ylim > yoff && xv[xlim - 1] == yv[ylim - 1])
351    --xlim, --ylim;
352
353  /* Handle simple cases. */
354  if (xoff == xlim)
355    while (yoff < ylim)
356      files[1].changed_flag[files[1].realindexes[yoff++]] = 1;
357  else if (yoff == ylim)
358    while (xoff < xlim)
359      files[0].changed_flag[files[0].realindexes[xoff++]] = 1;
360  else
361    {
362      int c;
363      struct partition part;
364
365      /* Find a point of correspondence in the middle of the files.  */
366
367      c = diag (xoff, xlim, yoff, ylim, minimal, &part);
368
369      if (c == 1)
370        {
371          /* This should be impossible, because it implies that
372             one of the two subsequences is empty,
373             and that case was handled above without calling `diag'.
374             Let's verify that this is true.  */
375          abort ();
376#if 0
377          /* The two subsequences differ by a single insert or delete;
378             record it and we are done.  */
379          if (part.xmid - part.ymid < xoff - yoff)
380            files[1].changed_flag[files[1].realindexes[part.ymid - 1]] = 1;
381          else
382            files[0].changed_flag[files[0].realindexes[part.xmid]] = 1;
383#endif
384        }
385      else
386        {
387          /* Use the partitions to split this problem into subproblems.  */
388          compareseq (xoff, part.xmid, yoff, part.ymid, part.lo_minimal);
389          compareseq (part.xmid, xlim, part.ymid, ylim, part.hi_minimal);
390        }
391    }
392}
393
394/* Discard lines from one file that have no matches in the other file.
395
396   A line which is discarded will not be considered by the actual
397   comparison algorithm; it will be as if that line were not in the file.
398   The file's `realindexes' table maps virtual line numbers
399   (which don't count the discarded lines) into real line numbers;
400   this is how the actual comparison algorithm produces results
401   that are comprehensible when the discarded lines are counted.
402
403   When we discard a line, we also mark it as a deletion or insertion
404   so that it will be printed in the output.  */
405
406static void
407discard_confusing_lines (filevec)
408     struct file_data filevec[];
409{
410  unsigned int f, i;
411  char *discarded[2];
412  int *equiv_count[2];
413  int *p;
414
415  /* Allocate our results.  */
416  p = (int *) xmalloc ((filevec[0].buffered_lines + filevec[1].buffered_lines)
417                       * (2 * sizeof (int)));
418  for (f = 0; f < 2; f++)
419    {
420      filevec[f].undiscarded = p;  p += filevec[f].buffered_lines;
421      filevec[f].realindexes = p;  p += filevec[f].buffered_lines;
422    }
423
424  /* Set up equiv_count[F][I] as the number of lines in file F
425     that fall in equivalence class I.  */
426
427  p = (int *) xmalloc (filevec[0].equiv_max * (2 * sizeof (int)));
428  equiv_count[0] = p;
429  equiv_count[1] = p + filevec[0].equiv_max;
430  bzero (p, filevec[0].equiv_max * (2 * sizeof (int)));
431
432  for (i = 0; i < filevec[0].buffered_lines; ++i)
433    ++equiv_count[0][filevec[0].equivs[i]];
434  for (i = 0; i < filevec[1].buffered_lines; ++i)
435    ++equiv_count[1][filevec[1].equivs[i]];
436
437  /* Set up tables of which lines are going to be discarded.  */
438
439  discarded[0] = xmalloc (sizeof (char)
440                          * (filevec[0].buffered_lines
441                             + filevec[1].buffered_lines));
442  discarded[1] = discarded[0] + filevec[0].buffered_lines;
443  bzero (discarded[0], sizeof (char) * (filevec[0].buffered_lines
444                                        + filevec[1].buffered_lines));
445
446  /* Mark to be discarded each line that matches no line of the other file.
447     If a line matches many lines, mark it as provisionally discardable.  */
448
449  for (f = 0; f < 2; f++)
450    {
451      unsigned int end = filevec[f].buffered_lines;
452      char *discards = discarded[f];
453      int *counts = equiv_count[1 - f];
454      int *equivs = filevec[f].equivs;
455      unsigned int many = 5;
456      unsigned int tem = end / 64;
457
458      /* Multiply MANY by approximate square root of number of lines.
459         That is the threshold for provisionally discardable lines.  */
460      while ((tem = tem >> 2) > 0)
461        many *= 2;
462
463      for (i = 0; i < end; i++)
464        {
465          int nmatch;
466          if (equivs[i] == 0)
467            continue;
468          nmatch = counts[equivs[i]];
469          if (nmatch == 0)
470            discards[i] = 1;
471          else if (nmatch > many)
472            discards[i] = 2;
473        }
474    }
475
476  /* Don't really discard the provisional lines except when they occur
477     in a run of discardables, with nonprovisionals at the beginning
478     and end.  */
479
480  for (f = 0; f < 2; f++)
481    {
482      unsigned int end = filevec[f].buffered_lines;
483      register char *discards = discarded[f];
484
485      for (i = 0; i < end; i++)
486        {
487          /* Cancel provisional discards not in middle of run of discards.  */
488          if (discards[i] == 2)
489            discards[i] = 0;
490          else if (discards[i] != 0)
491            {
492              /* We have found a nonprovisional discard.  */
493              register int j;
494              unsigned int length;
495              unsigned int provisional = 0;
496
497              /* Find end of this run of discardable lines.
498                 Count how many are provisionally discardable.  */
499              for (j = i; j < end; j++)
500                {
501                  if (discards[j] == 0)
502                    break;
503                  if (discards[j] == 2)
504                    ++provisional;
505                }
506
507              /* Cancel provisional discards at end, and shrink the run.  */
508              while (j > i && discards[j - 1] == 2)
509                discards[--j] = 0, --provisional;
510
511              /* Now we have the length of a run of discardable lines
512                 whose first and last are not provisional.  */
513              length = j - i;
514
515              /* If 1/4 of the lines in the run are provisional,
516                 cancel discarding of all provisional lines in the run.  */
517              if (provisional * 4 > length)
518                {
519                  while (j > i)
520                    if (discards[--j] == 2)
521                      discards[j] = 0;
522                }
523              else
524                {
525                  register unsigned int consec;
526                  unsigned int minimum = 1;
527                  unsigned int tem = length / 4;
528
529                  /* MINIMUM is approximate square root of LENGTH/4.
530                     A subrun of two or more provisionals can stand
531                     when LENGTH is at least 16.
532                     A subrun of 4 or more can stand when LENGTH >= 64.  */
533                  while ((tem = tem >> 2) > 0)
534                    minimum *= 2;
535                  minimum++;
536
537                  /* Cancel any subrun of MINIMUM or more provisionals
538                     within the larger run.  */
539                  for (j = 0, consec = 0; j < length; j++)
540                    if (discards[i + j] != 2)
541                      consec = 0;
542                    else if (minimum == ++consec)
543                      /* Back up to start of subrun, to cancel it all.  */
544                      j -= consec;
545                    else if (minimum < consec)
546                      discards[i + j] = 0;
547
548                  /* Scan from beginning of run
549                     until we find 3 or more nonprovisionals in a row
550                     or until the first nonprovisional at least 8 lines in.
551                     Until that point, cancel any provisionals.  */
552                  for (j = 0, consec = 0; j < length; j++)
553                    {
554                      if (j >= 8 && discards[i + j] == 1)
555                        break;
556                      if (discards[i + j] == 2)
557                        consec = 0, discards[i + j] = 0;
558                      else if (discards[i + j] == 0)
559                        consec = 0;
560                      else
561                        consec++;
562                      if (consec == 3)
563                        break;
564                    }
565
566                  /* I advances to the last line of the run.  */
567                  i += length - 1;
568
569                  /* Same thing, from end.  */
570                  for (j = 0, consec = 0; j < length; j++)
571                    {
572                      if (j >= 8 && discards[i - j] == 1)
573                        break;
574                      if (discards[i - j] == 2)
575                        consec = 0, discards[i - j] = 0;
576                      else if (discards[i - j] == 0)
577                        consec = 0;
578                      else
579                        consec++;
580                      if (consec == 3)
581                        break;
582                    }
583                }
584            }
585        }
586    }
587
588  /* Actually discard the lines. */
589  for (f = 0; f < 2; f++)
590    {
591      char *discards = discarded[f];
592      unsigned int end = filevec[f].buffered_lines;
593      unsigned int j = 0;
594      for (i = 0; i < end; ++i)
595        if (no_discards || discards[i] == 0)
596          {
597            filevec[f].undiscarded[j] = filevec[f].equivs[i];
598            filevec[f].realindexes[j++] = i;
599          }
600        else
601          filevec[f].changed_flag[i] = 1;
602      filevec[f].nondiscarded_lines = j;
603    }
604
605  free (discarded[0]);
606  free (equiv_count[0]);
607}
608
609/* Adjust inserts/deletes of identical lines to join changes
610   as much as possible.
611
612   We do something when a run of changed lines include a
613   line at one end and have an excluded, identical line at the other.
614   We are free to choose which identical line is included.
615   `compareseq' usually chooses the one at the beginning,
616   but usually it is cleaner to consider the following identical line
617   to be the "change".  */
618
619int inhibit;
620
621static void
622shift_boundaries (filevec)
623     struct file_data filevec[];
624{
625  int f;
626
627  if (inhibit)
628    return;
629
630  for (f = 0; f < 2; f++)
631    {
632      char *changed = filevec[f].changed_flag;
633      char const *other_changed = filevec[1-f].changed_flag;
634      int const *equivs = filevec[f].equivs;
635      int i = 0;
636      int j = 0;
637      int i_end = filevec[f].buffered_lines;
638
639      while (1)
640        {
641          int runlength, start, corresponding;
642
643          /* Scan forwards to find beginning of another run of changes.
644             Also keep track of the corresponding point in the other file.  */
645
646          while (i < i_end && changed[i] == 0)
647            {
648              while (other_changed[j++])
649                continue;
650              i++;
651            }
652
653          if (i == i_end)
654            break;
655
656          start = i;
657
658          /* Find the end of this run of changes.  */
659
660          while (changed[++i])
661            continue;
662          while (other_changed[j])
663            j++;
664
665          do
666            {
667              /* Record the length of this run of changes, so that
668                 we can later determine whether the run has grown.  */
669              runlength = i - start;
670
671              /* Move the changed region back, so long as the
672                 previous unchanged line matches the last changed one.
673                 This merges with previous changed regions.  */
674
675              while (start && equivs[start - 1] == equivs[i - 1])
676                {
677                  changed[--start] = 1;
678                  changed[--i] = 0;
679                  while (changed[start - 1])
680                    start--;
681                  while (other_changed[--j])
682                    continue;
683                }
684
685              /* Set CORRESPONDING to the end of the changed run, at the last
686                 point where it corresponds to a changed run in the other file.
687                 CORRESPONDING == I_END means no such point has been found.  */
688              corresponding = other_changed[j - 1] ? i : i_end;
689
690              /* Move the changed region forward, so long as the
691                 first changed line matches the following unchanged one.
692                 This merges with following changed regions.
693                 Do this second, so that if there are no merges,
694                 the changed region is moved forward as far as possible.  */
695
696              while (i != i_end && equivs[start] == equivs[i])
697                {
698                  changed[start++] = 0;
699                  changed[i++] = 1;
700                  while (changed[i])
701                    i++;
702                  while (other_changed[++j])
703                    corresponding = i;
704                }
705            }
706          while (runlength != i - start);
707
708          /* If possible, move the fully-merged run of changes
709             back to a corresponding run in the other file.  */
710
711          while (corresponding < i)
712            {
713              changed[--start] = 1;
714              changed[--i] = 0;
715              while (other_changed[--j])
716                continue;
717            }
718        }
719    }
720}
721
722/* Cons an additional entry onto the front of an edit script OLD.
723   LINE0 and LINE1 are the first affected lines in the two files (origin 0).
724   DELETED is the number of lines deleted here from file 0.
725   INSERTED is the number of lines inserted here in file 1.
726
727   If DELETED is 0 then LINE0 is the number of the line before
728   which the insertion was done; vice versa for INSERTED and LINE1.  */
729
730static struct change *
731add_change (line0, line1, deleted, inserted, old)
732     int line0, line1, deleted, inserted;
733     struct change *old;
734{
735  struct change *new = (struct change *) xmalloc (sizeof (struct change));
736
737  new->line0 = line0;
738  new->line1 = line1;
739  new->inserted = inserted;
740  new->deleted = deleted;
741  new->link = old;
742  return new;
743}
744
745/* Scan the tables of which lines are inserted and deleted,
746   producing an edit script in reverse order.  */
747
748static struct change *
749build_reverse_script (filevec)
750     struct file_data const filevec[];
751{
752  struct change *script = 0;
753  char *changed0 = filevec[0].changed_flag;
754  char *changed1 = filevec[1].changed_flag;
755  int len0 = filevec[0].buffered_lines;
756  int len1 = filevec[1].buffered_lines;
757
758  /* Note that changedN[len0] does exist, and contains 0.  */
759
760  int i0 = 0, i1 = 0;
761
762  while (i0 < len0 || i1 < len1)
763    {
764      if (changed0[i0] || changed1[i1])
765        {
766          int line0 = i0, line1 = i1;
767
768          /* Find # lines changed here in each file.  */
769          while (changed0[i0]) ++i0;
770          while (changed1[i1]) ++i1;
771
772          /* Record this change.  */
773          script = add_change (line0, line1, i0 - line0, i1 - line1, script);
774        }
775
776      /* We have reached lines in the two files that match each other.  */
777      i0++, i1++;
778    }
779
780  return script;
781}
782
783/* Scan the tables of which lines are inserted and deleted,
784   producing an edit script in forward order.  */
785
786static struct change *
787build_script (filevec)
788     struct file_data const filevec[];
789{
790  struct change *script = 0;
791  char *changed0 = filevec[0].changed_flag;
792  char *changed1 = filevec[1].changed_flag;
793  int i0 = filevec[0].buffered_lines, i1 = filevec[1].buffered_lines;
794
795  /* Note that changedN[-1] does exist, and contains 0.  */
796
797  while (i0 >= 0 || i1 >= 0)
798    {
799      if (changed0[i0 - 1] || changed1[i1 - 1])
800        {
801          int line0 = i0, line1 = i1;
802
803          /* Find # lines changed here in each file.  */
804          while (changed0[i0 - 1]) --i0;
805          while (changed1[i1 - 1]) --i1;
806
807          /* Record this change.  */
808          script = add_change (i0, i1, line0 - i0, line1 - i1, script);
809        }
810
811      /* We have reached lines in the two files that match each other.  */
812      i0--, i1--;
813    }
814
815  return script;
816}
817
818/* If CHANGES, briefly report that two files differed.  */
819static void
820briefly_report (changes, filevec)
821     int changes;
822     struct file_data const filevec[];
823{
824  if (changes)
825    message (no_details_flag ? "Files %s and %s differ\n"
826             : "Binary files %s and %s differ\n",
827             filevec[0].name, filevec[1].name);
828}
829
830/* Report the differences of two files.  DEPTH is the current directory
831   depth. */
832int
833diff_2_files (filevec, depth)
834     struct file_data filevec[];
835     int depth;
836{
837  int diags;
838  int i;
839  struct change *e, *p;
840  struct change *script;
841  int changes;
842
843
844  /* If we have detected that either file is binary,
845     compare the two files as binary.  This can happen
846     only when the first chunk is read.
847     Also, --brief without any --ignore-* options means
848     we can speed things up by treating the files as binary.  */
849
850  if (read_files (filevec, no_details_flag & ~ignore_some_changes))
851    {
852      /* Files with different lengths must be different.  */
853      if (filevec[0].stat.st_size != filevec[1].stat.st_size
854          && (filevec[0].desc < 0 || S_ISREG (filevec[0].stat.st_mode))
855          && (filevec[1].desc < 0 || S_ISREG (filevec[1].stat.st_mode)))
856        changes = 1;
857
858      /* Standard input equals itself.  */
859      else if (filevec[0].desc == filevec[1].desc)
860        changes = 0;
861
862      else
863        /* Scan both files, a buffer at a time, looking for a difference.  */
864        {
865          /* Allocate same-sized buffers for both files.  */
866          size_t buffer_size = buffer_lcm (STAT_BLOCKSIZE (filevec[0].stat),
867                                           STAT_BLOCKSIZE (filevec[1].stat));
868          for (i = 0; i < 2; i++)
869            filevec[i].buffer = xrealloc (filevec[i].buffer, buffer_size);
870
871          for (;;  filevec[0].buffered_chars = filevec[1].buffered_chars = 0)
872            {
873              /* Read a buffer's worth from both files.  */
874              for (i = 0; i < 2; i++)
875                if (0 <= filevec[i].desc)
876                  while (filevec[i].buffered_chars != buffer_size)
877                    {
878                      int r = read (filevec[i].desc,
879                                    filevec[i].buffer
880                                    + filevec[i].buffered_chars,
881                                    buffer_size - filevec[i].buffered_chars);
882                      if (r == 0)
883                        break;
884                      if (r < 0)
885                        pfatal_with_name (filevec[i].name);
886                      filevec[i].buffered_chars += r;
887                    }
888
889              /* If the buffers differ, the files differ.  */
890              if (filevec[0].buffered_chars != filevec[1].buffered_chars
891                  || (filevec[0].buffered_chars != 0
892                      && memcmp (filevec[0].buffer,
893                                 filevec[1].buffer,
894                                 filevec[0].buffered_chars) != 0))
895                {
896                  changes = 1;
897                  break;
898                }
899
900              /* If we reach end of file, the files are the same.  */
901              if (filevec[0].buffered_chars != buffer_size)
902                {
903                  changes = 0;
904                  break;
905                }
906            }
907        }
908
909      briefly_report (changes, filevec);
910    }
911  else
912    {
913      /* Allocate vectors for the results of comparison:
914         a flag for each line of each file, saying whether that line
915         is an insertion or deletion.
916         Allocate an extra element, always zero, at each end of each vector.  */
917
918      size_t s = filevec[0].buffered_lines + filevec[1].buffered_lines + 4;
919      filevec[0].changed_flag = xmalloc (s);
920      bzero (filevec[0].changed_flag, s);
921      filevec[0].changed_flag++;
922      filevec[1].changed_flag = filevec[0].changed_flag
923                                + filevec[0].buffered_lines + 2;
924
925      /* Some lines are obviously insertions or deletions
926         because they don't match anything.  Detect them now, and
927         avoid even thinking about them in the main comparison algorithm.  */
928
929      discard_confusing_lines (filevec);
930
931      /* Now do the main comparison algorithm, considering just the
932         undiscarded lines.  */
933
934      xvec = filevec[0].undiscarded;
935      yvec = filevec[1].undiscarded;
936      diags = filevec[0].nondiscarded_lines + filevec[1].nondiscarded_lines + 3;
937      fdiag = (int *) xmalloc (diags * (2 * sizeof (int)));
938      bdiag = fdiag + diags;
939      fdiag += filevec[1].nondiscarded_lines + 1;
940      bdiag += filevec[1].nondiscarded_lines + 1;
941
942      /* Set TOO_EXPENSIVE to be approximate square root of input size,
943         bounded below by 256.  */
944      too_expensive = 1;
945      for (i = filevec[0].nondiscarded_lines + filevec[1].nondiscarded_lines;
946           i != 0; i >>= 2)
947        too_expensive <<= 1;
948      too_expensive = max (256, too_expensive);
949
950      files[0] = filevec[0];
951      files[1] = filevec[1];
952
953      compareseq (0, filevec[0].nondiscarded_lines,
954                  0, filevec[1].nondiscarded_lines, no_discards);
955
956      free (fdiag - (filevec[1].nondiscarded_lines + 1));
957
958      /* Modify the results slightly to make them prettier
959         in cases where that can validly be done.  */
960
961      shift_boundaries (filevec);
962
963      /* Get the results of comparison in the form of a chain
964         of `struct change's -- an edit script.  */
965
966      if (output_style == OUTPUT_ED)
967        script = build_reverse_script (filevec);
968      else
969        script = build_script (filevec);
970
971      /* Set CHANGES if we had any diffs.
972         If some changes are ignored, we must scan the script to decide.  */
973      if (ignore_blank_lines_flag || ignore_regexp_list)
974        {
975          struct change *next = script;
976          changes = 0;
977
978          while (next && changes == 0)
979            {
980              struct change *this, *end;
981              int first0, last0, first1, last1, deletes, inserts;
982
983              /* Find a set of changes that belong together.  */
984              this = next;
985              end = find_change (next);
986
987              /* Disconnect them from the rest of the changes, making them
988                 a hunk, and remember the rest for next iteration.  */
989              next = end->link;
990              end->link = 0;
991
992              /* Determine whether this hunk is really a difference.  */
993              analyze_hunk (this, &first0, &last0, &first1, &last1,
994                            &deletes, &inserts);
995
996              /* Reconnect the script so it will all be freed properly.  */
997              end->link = next;
998
999              if (deletes || inserts)
1000                changes = 1;
1001            }
1002        }
1003      else
1004        changes = (script != 0);
1005
1006      if (no_details_flag)
1007        briefly_report (changes, filevec);
1008      else
1009        {
1010          if (changes || ! no_diff_means_no_output)
1011            {
1012              /* Record info for starting up output,
1013                 to be used if and when we have some output to print.  */
1014              setup_output (files[0].name, files[1].name, depth);
1015
1016              switch (output_style)
1017                {
1018                case OUTPUT_CONTEXT:
1019                  print_context_script (script, 0);
1020                  break;
1021
1022                case OUTPUT_UNIFIED:
1023                  print_context_script (script, 1);
1024                  break;
1025
1026                case OUTPUT_ED:
1027                  print_ed_script (script);
1028                  break;
1029
1030                case OUTPUT_FORWARD_ED:
1031                  pr_forward_ed_script (script);
1032                  break;
1033
1034                case OUTPUT_RCS:
1035                  print_rcs_script (script);
1036                  break;
1037
1038                case OUTPUT_NORMAL:
1039                  print_normal_script (script);
1040                  break;
1041
1042                case OUTPUT_IFDEF:
1043                  print_ifdef_script (script);
1044                  break;
1045
1046                case OUTPUT_SDIFF:
1047                  print_sdiff_script (script);
1048                }
1049
1050              finish_output ();
1051            }
1052        }
1053
1054      free (filevec[0].undiscarded);
1055
1056      free (filevec[0].changed_flag - 1);
1057
1058      for (i = 1; i >= 0; --i)
1059        free (filevec[i].equivs);
1060
1061      for (i = 0; i < 2; ++i)
1062        free (filevec[i].linbuf + filevec[i].linbuf_base);
1063
1064      for (e = script; e; e = p)
1065        {
1066          p = e->link;
1067          free (e);
1068        }
1069
1070      if (! ROBUST_OUTPUT_STYLE (output_style))
1071        for (i = 0; i < 2; ++i)
1072          if (filevec[i].missing_newline)
1073            {
1074              error ("No newline at end of file %s", filevec[i].name, "");
1075              changes = 2;
1076            }
1077    }
1078
1079  if (filevec[0].buffer != filevec[1].buffer)
1080    free (filevec[0].buffer);
1081  free (filevec[1].buffer);
1082
1083  return changes;
1084}
Note: See TracBrowser for help on using the repository browser.