1 | /* Functions to make fuzzy comparisons between strings |
---|
2 | Copyright (C) 1988-1989, 1992-1993, 1995, 2001 Free Software Foundation, Inc. |
---|
3 | |
---|
4 | This program is free software; you can redistribute it and/or modify |
---|
5 | it under the terms of the GNU General Public License as published by |
---|
6 | the Free Software Foundation; either version 2 of the License, or (at |
---|
7 | your option) any later version. |
---|
8 | |
---|
9 | This program is distributed in the hope that it will be useful, but |
---|
10 | WITHOUT ANY WARRANTY; without even the implied warranty of |
---|
11 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
---|
12 | General Public License for more details. |
---|
13 | |
---|
14 | You should have received a copy of the GNU General Public License |
---|
15 | along with this program; if not, write to the Free Software |
---|
16 | Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. |
---|
17 | |
---|
18 | |
---|
19 | Derived from GNU diff 2.7, analyze.c et al. |
---|
20 | |
---|
21 | The basic algorithm is described in: |
---|
22 | "An O(ND) Difference Algorithm and its Variations", Eugene Myers, |
---|
23 | Algorithmica Vol. 1 No. 2, 1986, pp. 251-266; |
---|
24 | see especially section 4.2, which describes the variation used below. |
---|
25 | |
---|
26 | The basic algorithm was independently discovered as described in: |
---|
27 | "Algorithms for Approximate String Matching", E. Ukkonen, |
---|
28 | Information and Control Vol. 64, 1985, pp. 100-118. |
---|
29 | |
---|
30 | Modified to work on strings rather than files |
---|
31 | by Peter Miller <pmiller@agso.gov.au>, October 1995 */ |
---|
32 | |
---|
33 | #ifdef HAVE_CONFIG_H |
---|
34 | # include "config.h" |
---|
35 | #endif |
---|
36 | |
---|
37 | #include <string.h> |
---|
38 | #include <stdio.h> |
---|
39 | |
---|
40 | #ifdef HAVE_LIMITS_H |
---|
41 | # include <limits.h> |
---|
42 | #else |
---|
43 | # define INT_MAX ((int)(~(unsigned)0 >> 1)) |
---|
44 | #endif |
---|
45 | |
---|
46 | #include "system.h" |
---|
47 | #include "fstrcmp.h" |
---|
48 | |
---|
49 | |
---|
50 | /* |
---|
51 | * Data on one input string being compared. |
---|
52 | */ |
---|
53 | struct string_data |
---|
54 | { |
---|
55 | /* The string to be compared. */ |
---|
56 | const char *data; |
---|
57 | |
---|
58 | /* The length of the string to be compared. */ |
---|
59 | int data_length; |
---|
60 | |
---|
61 | /* The number of characters inserted or deleted. */ |
---|
62 | int edit_count; |
---|
63 | }; |
---|
64 | |
---|
65 | static struct string_data string[2]; |
---|
66 | |
---|
67 | |
---|
68 | #ifdef MINUS_H_FLAG |
---|
69 | |
---|
70 | /* This corresponds to the diff -H flag. With this heuristic, for |
---|
71 | strings with a constant small density of changes, the algorithm is |
---|
72 | linear in the strings size. This is unlikely in typical uses of |
---|
73 | fstrcmp, and so is usually compiled out. Besides, there is no |
---|
74 | interface to set it true. */ |
---|
75 | static int heuristic; |
---|
76 | |
---|
77 | #endif |
---|
78 | |
---|
79 | |
---|
80 | /* Vector, indexed by diagonal, containing 1 + the X coordinate of the |
---|
81 | point furthest along the given diagonal in the forward search of the |
---|
82 | edit matrix. */ |
---|
83 | static int *fdiag; |
---|
84 | |
---|
85 | /* Vector, indexed by diagonal, containing the X coordinate of the point |
---|
86 | furthest along the given diagonal in the backward search of the edit |
---|
87 | matrix. */ |
---|
88 | static int *bdiag; |
---|
89 | |
---|
90 | /* Edit scripts longer than this are too expensive to compute. */ |
---|
91 | static int too_expensive; |
---|
92 | |
---|
93 | /* Snakes bigger than this are considered `big'. */ |
---|
94 | #define SNAKE_LIMIT 20 |
---|
95 | |
---|
96 | struct partition |
---|
97 | { |
---|
98 | /* Midpoints of this partition. */ |
---|
99 | int xmid, ymid; |
---|
100 | |
---|
101 | /* Nonzero if low half will be analyzed minimally. */ |
---|
102 | int lo_minimal; |
---|
103 | |
---|
104 | /* Likewise for high half. */ |
---|
105 | int hi_minimal; |
---|
106 | }; |
---|
107 | |
---|
108 | |
---|
109 | /* NAME |
---|
110 | diag - find diagonal path |
---|
111 | |
---|
112 | SYNOPSIS |
---|
113 | int diag(int xoff, int xlim, int yoff, int ylim, int minimal, |
---|
114 | struct partition *part); |
---|
115 | |
---|
116 | DESCRIPTION |
---|
117 | Find the midpoint of the shortest edit script for a specified |
---|
118 | portion of the two strings. |
---|
119 | |
---|
120 | Scan from the beginnings of the strings, and simultaneously from |
---|
121 | the ends, doing a breadth-first search through the space of |
---|
122 | edit-sequence. When the two searches meet, we have found the |
---|
123 | midpoint of the shortest edit sequence. |
---|
124 | |
---|
125 | If MINIMAL is nonzero, find the minimal edit script regardless |
---|
126 | of expense. Otherwise, if the search is too expensive, use |
---|
127 | heuristics to stop the search and report a suboptimal answer. |
---|
128 | |
---|
129 | RETURNS |
---|
130 | Set PART->(XMID,YMID) to the midpoint (XMID,YMID). The diagonal |
---|
131 | number XMID - YMID equals the number of inserted characters |
---|
132 | minus the number of deleted characters (counting only characters |
---|
133 | before the midpoint). Return the approximate edit cost; this is |
---|
134 | the total number of characters inserted or deleted (counting |
---|
135 | only characters before the midpoint), unless a heuristic is used |
---|
136 | to terminate the search prematurely. |
---|
137 | |
---|
138 | Set PART->LEFT_MINIMAL to nonzero iff the minimal edit script |
---|
139 | for the left half of the partition is known; similarly for |
---|
140 | PART->RIGHT_MINIMAL. |
---|
141 | |
---|
142 | CAVEAT |
---|
143 | This function assumes that the first characters of the specified |
---|
144 | portions of the two strings do not match, and likewise that the |
---|
145 | last characters do not match. The caller must trim matching |
---|
146 | characters from the beginning and end of the portions it is |
---|
147 | going to specify. |
---|
148 | |
---|
149 | If we return the "wrong" partitions, the worst this can do is |
---|
150 | cause suboptimal diff output. It cannot cause incorrect diff |
---|
151 | output. */ |
---|
152 | |
---|
153 | static int diag PARAMS ((int, int, int, int, int, struct partition *)); |
---|
154 | |
---|
155 | static int |
---|
156 | diag (xoff, xlim, yoff, ylim, minimal, part) |
---|
157 | int xoff; |
---|
158 | int xlim; |
---|
159 | int yoff; |
---|
160 | int ylim; |
---|
161 | int minimal; |
---|
162 | struct partition *part; |
---|
163 | { |
---|
164 | int *const fd = fdiag; /* Give the compiler a chance. */ |
---|
165 | int *const bd = bdiag; /* Additional help for the compiler. */ |
---|
166 | const char *const xv = string[0].data; /* Still more help for the compiler. */ |
---|
167 | const char *const yv = string[1].data; /* And more and more . . . */ |
---|
168 | const int dmin = xoff - ylim; /* Minimum valid diagonal. */ |
---|
169 | const int dmax = xlim - yoff; /* Maximum valid diagonal. */ |
---|
170 | const int fmid = xoff - yoff; /* Center diagonal of top-down search. */ |
---|
171 | const int bmid = xlim - ylim; /* Center diagonal of bottom-up search. */ |
---|
172 | int fmin = fmid; |
---|
173 | int fmax = fmid; /* Limits of top-down search. */ |
---|
174 | int bmin = bmid; |
---|
175 | int bmax = bmid; /* Limits of bottom-up search. */ |
---|
176 | int c; /* Cost. */ |
---|
177 | int odd = (fmid - bmid) & 1; |
---|
178 | |
---|
179 | /* |
---|
180 | * True if southeast corner is on an odd diagonal with respect |
---|
181 | * to the northwest. |
---|
182 | */ |
---|
183 | fd[fmid] = xoff; |
---|
184 | bd[bmid] = xlim; |
---|
185 | for (c = 1;; ++c) |
---|
186 | { |
---|
187 | int d; /* Active diagonal. */ |
---|
188 | int big_snake; |
---|
189 | |
---|
190 | big_snake = 0; |
---|
191 | /* Extend the top-down search by an edit step in each diagonal. */ |
---|
192 | if (fmin > dmin) |
---|
193 | fd[--fmin - 1] = -1; |
---|
194 | else |
---|
195 | ++fmin; |
---|
196 | if (fmax < dmax) |
---|
197 | fd[++fmax + 1] = -1; |
---|
198 | else |
---|
199 | --fmax; |
---|
200 | for (d = fmax; d >= fmin; d -= 2) |
---|
201 | { |
---|
202 | int x; |
---|
203 | int y; |
---|
204 | int oldx; |
---|
205 | int tlo; |
---|
206 | int thi; |
---|
207 | |
---|
208 | tlo = fd[d - 1], |
---|
209 | thi = fd[d + 1]; |
---|
210 | |
---|
211 | if (tlo >= thi) |
---|
212 | x = tlo + 1; |
---|
213 | else |
---|
214 | x = thi; |
---|
215 | oldx = x; |
---|
216 | y = x - d; |
---|
217 | while (x < xlim && y < ylim && xv[x] == yv[y]) |
---|
218 | { |
---|
219 | ++x; |
---|
220 | ++y; |
---|
221 | } |
---|
222 | if (x - oldx > SNAKE_LIMIT) |
---|
223 | big_snake = 1; |
---|
224 | fd[d] = x; |
---|
225 | if (odd && bmin <= d && d <= bmax && bd[d] <= x) |
---|
226 | { |
---|
227 | part->xmid = x; |
---|
228 | part->ymid = y; |
---|
229 | part->lo_minimal = part->hi_minimal = 1; |
---|
230 | return 2 * c - 1; |
---|
231 | } |
---|
232 | } |
---|
233 | /* Similarly extend the bottom-up search. */ |
---|
234 | if (bmin > dmin) |
---|
235 | bd[--bmin - 1] = INT_MAX; |
---|
236 | else |
---|
237 | ++bmin; |
---|
238 | if (bmax < dmax) |
---|
239 | bd[++bmax + 1] = INT_MAX; |
---|
240 | else |
---|
241 | --bmax; |
---|
242 | for (d = bmax; d >= bmin; d -= 2) |
---|
243 | { |
---|
244 | int x; |
---|
245 | int y; |
---|
246 | int oldx; |
---|
247 | int tlo; |
---|
248 | int thi; |
---|
249 | |
---|
250 | tlo = bd[d - 1], |
---|
251 | thi = bd[d + 1]; |
---|
252 | if (tlo < thi) |
---|
253 | x = tlo; |
---|
254 | else |
---|
255 | x = thi - 1; |
---|
256 | oldx = x; |
---|
257 | y = x - d; |
---|
258 | while (x > xoff && y > yoff && xv[x - 1] == yv[y - 1]) |
---|
259 | { |
---|
260 | --x; |
---|
261 | --y; |
---|
262 | } |
---|
263 | if (oldx - x > SNAKE_LIMIT) |
---|
264 | big_snake = 1; |
---|
265 | bd[d] = x; |
---|
266 | if (!odd && fmin <= d && d <= fmax && x <= fd[d]) |
---|
267 | { |
---|
268 | part->xmid = x; |
---|
269 | part->ymid = y; |
---|
270 | part->lo_minimal = part->hi_minimal = 1; |
---|
271 | return 2 * c; |
---|
272 | } |
---|
273 | } |
---|
274 | |
---|
275 | if (minimal) |
---|
276 | continue; |
---|
277 | |
---|
278 | #ifdef MINUS_H_FLAG |
---|
279 | /* Heuristic: check occasionally for a diagonal that has made lots |
---|
280 | of progress compared with the edit distance. If we have any |
---|
281 | such, find the one that has made the most progress and return |
---|
282 | it as if it had succeeded. |
---|
283 | |
---|
284 | With this heuristic, for strings with a constant small density |
---|
285 | of changes, the algorithm is linear in the strings size. */ |
---|
286 | if (c > 200 && big_snake && heuristic) |
---|
287 | { |
---|
288 | int best; |
---|
289 | |
---|
290 | best = 0; |
---|
291 | for (d = fmax; d >= fmin; d -= 2) |
---|
292 | { |
---|
293 | int dd; |
---|
294 | int x; |
---|
295 | int y; |
---|
296 | int v; |
---|
297 | |
---|
298 | dd = d - fmid; |
---|
299 | x = fd[d]; |
---|
300 | y = x - d; |
---|
301 | v = (x - xoff) * 2 - dd; |
---|
302 | |
---|
303 | if (v > 12 * (c + (dd < 0 ? -dd : dd))) |
---|
304 | { |
---|
305 | if |
---|
306 | ( |
---|
307 | v > best |
---|
308 | && |
---|
309 | xoff + SNAKE_LIMIT <= x |
---|
310 | && |
---|
311 | x < xlim |
---|
312 | && |
---|
313 | yoff + SNAKE_LIMIT <= y |
---|
314 | && |
---|
315 | y < ylim |
---|
316 | ) |
---|
317 | { |
---|
318 | /* We have a good enough best diagonal; now insist |
---|
319 | that it end with a significant snake. */ |
---|
320 | int k; |
---|
321 | |
---|
322 | for (k = 1; xv[x - k] == yv[y - k]; k++) |
---|
323 | { |
---|
324 | if (k == SNAKE_LIMIT) |
---|
325 | { |
---|
326 | best = v; |
---|
327 | part->xmid = x; |
---|
328 | part->ymid = y; |
---|
329 | break; |
---|
330 | } |
---|
331 | } |
---|
332 | } |
---|
333 | } |
---|
334 | } |
---|
335 | if (best > 0) |
---|
336 | { |
---|
337 | part->lo_minimal = 1; |
---|
338 | part->hi_minimal = 0; |
---|
339 | return 2 * c - 1; |
---|
340 | } |
---|
341 | best = 0; |
---|
342 | for (d = bmax; d >= bmin; d -= 2) |
---|
343 | { |
---|
344 | int dd; |
---|
345 | int x; |
---|
346 | int y; |
---|
347 | int v; |
---|
348 | |
---|
349 | dd = d - bmid; |
---|
350 | x = bd[d]; |
---|
351 | y = x - d; |
---|
352 | v = (xlim - x) * 2 + dd; |
---|
353 | |
---|
354 | if (v > 12 * (c + (dd < 0 ? -dd : dd))) |
---|
355 | { |
---|
356 | if (v > best && xoff < x && x <= xlim - SNAKE_LIMIT && |
---|
357 | yoff < y && y <= ylim - SNAKE_LIMIT) |
---|
358 | { |
---|
359 | /* We have a good enough best diagonal; now insist |
---|
360 | that it end with a significant snake. */ |
---|
361 | int k; |
---|
362 | |
---|
363 | for (k = 0; xv[x + k] == yv[y + k]; k++) |
---|
364 | { |
---|
365 | if (k == SNAKE_LIMIT - 1) |
---|
366 | { |
---|
367 | best = v; |
---|
368 | part->xmid = x; |
---|
369 | part->ymid = y; |
---|
370 | break; |
---|
371 | } |
---|
372 | } |
---|
373 | } |
---|
374 | } |
---|
375 | } |
---|
376 | if (best > 0) |
---|
377 | { |
---|
378 | part->lo_minimal = 0; |
---|
379 | part->hi_minimal = 1; |
---|
380 | return 2 * c - 1; |
---|
381 | } |
---|
382 | } |
---|
383 | #endif /* MINUS_H_FLAG */ |
---|
384 | |
---|
385 | /* Heuristic: if we've gone well beyond the call of duty, give up |
---|
386 | and report halfway between our best results so far. */ |
---|
387 | if (c >= too_expensive) |
---|
388 | { |
---|
389 | int fxybest; |
---|
390 | int fxbest; |
---|
391 | int bxybest; |
---|
392 | int bxbest; |
---|
393 | |
---|
394 | /* Pacify `gcc -Wall'. */ |
---|
395 | fxbest = 0; |
---|
396 | bxbest = 0; |
---|
397 | |
---|
398 | /* Find forward diagonal that maximizes X + Y. */ |
---|
399 | fxybest = -1; |
---|
400 | for (d = fmax; d >= fmin; d -= 2) |
---|
401 | { |
---|
402 | int x; |
---|
403 | int y; |
---|
404 | |
---|
405 | x = fd[d] < xlim ? fd[d] : xlim; |
---|
406 | y = x - d; |
---|
407 | |
---|
408 | if (ylim < y) |
---|
409 | { |
---|
410 | x = ylim + d; |
---|
411 | y = ylim; |
---|
412 | } |
---|
413 | if (fxybest < x + y) |
---|
414 | { |
---|
415 | fxybest = x + y; |
---|
416 | fxbest = x; |
---|
417 | } |
---|
418 | } |
---|
419 | /* Find backward diagonal that minimizes X + Y. */ |
---|
420 | bxybest = INT_MAX; |
---|
421 | for (d = bmax; d >= bmin; d -= 2) |
---|
422 | { |
---|
423 | int x; |
---|
424 | int y; |
---|
425 | |
---|
426 | x = xoff > bd[d] ? xoff : bd[d]; |
---|
427 | y = x - d; |
---|
428 | |
---|
429 | if (y < yoff) |
---|
430 | { |
---|
431 | x = yoff + d; |
---|
432 | y = yoff; |
---|
433 | } |
---|
434 | if (x + y < bxybest) |
---|
435 | { |
---|
436 | bxybest = x + y; |
---|
437 | bxbest = x; |
---|
438 | } |
---|
439 | } |
---|
440 | /* Use the better of the two diagonals. */ |
---|
441 | if ((xlim + ylim) - bxybest < fxybest - (xoff + yoff)) |
---|
442 | { |
---|
443 | part->xmid = fxbest; |
---|
444 | part->ymid = fxybest - fxbest; |
---|
445 | part->lo_minimal = 1; |
---|
446 | part->hi_minimal = 0; |
---|
447 | } |
---|
448 | else |
---|
449 | { |
---|
450 | part->xmid = bxbest; |
---|
451 | part->ymid = bxybest - bxbest; |
---|
452 | part->lo_minimal = 0; |
---|
453 | part->hi_minimal = 1; |
---|
454 | } |
---|
455 | return 2 * c - 1; |
---|
456 | } |
---|
457 | } |
---|
458 | } |
---|
459 | |
---|
460 | |
---|
461 | /* NAME |
---|
462 | compareseq - find edit sequence |
---|
463 | |
---|
464 | SYNOPSIS |
---|
465 | void compareseq(int xoff, int xlim, int yoff, int ylim, int minimal); |
---|
466 | |
---|
467 | DESCRIPTION |
---|
468 | Compare in detail contiguous subsequences of the two strings |
---|
469 | which are known, as a whole, to match each other. |
---|
470 | |
---|
471 | The subsequence of string 0 is [XOFF, XLIM) and likewise for |
---|
472 | string 1. |
---|
473 | |
---|
474 | Note that XLIM, YLIM are exclusive bounds. All character |
---|
475 | numbers are origin-0. |
---|
476 | |
---|
477 | If MINIMAL is nonzero, find a minimal difference no matter how |
---|
478 | expensive it is. */ |
---|
479 | |
---|
480 | static void compareseq PARAMS ((int, int, int, int, int)); |
---|
481 | |
---|
482 | static void |
---|
483 | compareseq (xoff, xlim, yoff, ylim, minimal) |
---|
484 | int xoff; |
---|
485 | int xlim; |
---|
486 | int yoff; |
---|
487 | int ylim; |
---|
488 | int minimal; |
---|
489 | { |
---|
490 | const char *const xv = string[0].data; /* Help the compiler. */ |
---|
491 | const char *const yv = string[1].data; |
---|
492 | |
---|
493 | /* Slide down the bottom initial diagonal. */ |
---|
494 | while (xoff < xlim && yoff < ylim && xv[xoff] == yv[yoff]) |
---|
495 | { |
---|
496 | ++xoff; |
---|
497 | ++yoff; |
---|
498 | } |
---|
499 | |
---|
500 | /* Slide up the top initial diagonal. */ |
---|
501 | while (xlim > xoff && ylim > yoff && xv[xlim - 1] == yv[ylim - 1]) |
---|
502 | { |
---|
503 | --xlim; |
---|
504 | --ylim; |
---|
505 | } |
---|
506 | |
---|
507 | /* Handle simple cases. */ |
---|
508 | if (xoff == xlim) |
---|
509 | { |
---|
510 | while (yoff < ylim) |
---|
511 | { |
---|
512 | ++string[1].edit_count; |
---|
513 | ++yoff; |
---|
514 | } |
---|
515 | } |
---|
516 | else if (yoff == ylim) |
---|
517 | { |
---|
518 | while (xoff < xlim) |
---|
519 | { |
---|
520 | ++string[0].edit_count; |
---|
521 | ++xoff; |
---|
522 | } |
---|
523 | } |
---|
524 | else |
---|
525 | { |
---|
526 | int c; |
---|
527 | struct partition part; |
---|
528 | |
---|
529 | /* Find a point of correspondence in the middle of the strings. */ |
---|
530 | c = diag (xoff, xlim, yoff, ylim, minimal, &part); |
---|
531 | if (c == 1) |
---|
532 | { |
---|
533 | #if 0 |
---|
534 | /* This should be impossible, because it implies that one of |
---|
535 | the two subsequences is empty, and that case was handled |
---|
536 | above without calling `diag'. Let's verify that this is |
---|
537 | true. */ |
---|
538 | abort (); |
---|
539 | #else |
---|
540 | /* The two subsequences differ by a single insert or delete; |
---|
541 | record it and we are done. */ |
---|
542 | if (part.xmid - part.ymid < xoff - yoff) |
---|
543 | ++string[1].edit_count; |
---|
544 | else |
---|
545 | ++string[0].edit_count; |
---|
546 | #endif |
---|
547 | } |
---|
548 | else |
---|
549 | { |
---|
550 | /* Use the partitions to split this problem into subproblems. */ |
---|
551 | compareseq (xoff, part.xmid, yoff, part.ymid, part.lo_minimal); |
---|
552 | compareseq (part.xmid, xlim, part.ymid, ylim, part.hi_minimal); |
---|
553 | } |
---|
554 | } |
---|
555 | } |
---|
556 | |
---|
557 | |
---|
558 | /* NAME |
---|
559 | fstrcmp - fuzzy string compare |
---|
560 | |
---|
561 | SYNOPSIS |
---|
562 | double fstrcmp(const char *, const char *); |
---|
563 | |
---|
564 | DESCRIPTION |
---|
565 | The fstrcmp function may be used to compare two string for |
---|
566 | similarity. It is very useful in reducing "cascade" or |
---|
567 | "secondary" errors in compilers or other situations where |
---|
568 | symbol tables occur. |
---|
569 | |
---|
570 | RETURNS |
---|
571 | double; 0 if the strings are entirly dissimilar, 1 if the |
---|
572 | strings are identical, and a number in between if they are |
---|
573 | similar. */ |
---|
574 | |
---|
575 | double |
---|
576 | fstrcmp (string1, string2) |
---|
577 | const char *string1; |
---|
578 | const char *string2; |
---|
579 | { |
---|
580 | int i; |
---|
581 | |
---|
582 | size_t fdiag_len; |
---|
583 | static int *fdiag_buf; |
---|
584 | static size_t fdiag_max; |
---|
585 | |
---|
586 | /* set the info for each string. */ |
---|
587 | string[0].data = string1; |
---|
588 | string[0].data_length = strlen (string1); |
---|
589 | string[1].data = string2; |
---|
590 | string[1].data_length = strlen (string2); |
---|
591 | |
---|
592 | /* short-circuit obvious comparisons */ |
---|
593 | if (string[0].data_length == 0 && string[1].data_length == 0) |
---|
594 | return 1.0; |
---|
595 | if (string[0].data_length == 0 || string[1].data_length == 0) |
---|
596 | return 0.0; |
---|
597 | |
---|
598 | /* Set TOO_EXPENSIVE to be approximate square root of input size, |
---|
599 | bounded below by 256. */ |
---|
600 | too_expensive = 1; |
---|
601 | for (i = string[0].data_length + string[1].data_length; i != 0; i >>= 2) |
---|
602 | too_expensive <<= 1; |
---|
603 | if (too_expensive < 256) |
---|
604 | too_expensive = 256; |
---|
605 | |
---|
606 | /* Because fstrcmp is typically called multiple times, while scanning |
---|
607 | symbol tables, etc, attempt to minimize the number of memory |
---|
608 | allocations performed. Thus, we use a static buffer for the |
---|
609 | diagonal vectors, and never free them. */ |
---|
610 | fdiag_len = string[0].data_length + string[1].data_length + 3; |
---|
611 | if (fdiag_len > fdiag_max) |
---|
612 | { |
---|
613 | fdiag_max = fdiag_len; |
---|
614 | fdiag_buf = xrealloc (fdiag_buf, fdiag_max * (2 * sizeof (int))); |
---|
615 | } |
---|
616 | fdiag = fdiag_buf + string[1].data_length + 1; |
---|
617 | bdiag = fdiag + fdiag_len; |
---|
618 | |
---|
619 | /* Now do the main comparison algorithm */ |
---|
620 | string[0].edit_count = 0; |
---|
621 | string[1].edit_count = 0; |
---|
622 | compareseq (0, string[0].data_length, 0, string[1].data_length, 0); |
---|
623 | |
---|
624 | /* The result is |
---|
625 | ((number of chars in common) / (average length of the strings)). |
---|
626 | This is admittedly biased towards finding that the strings are |
---|
627 | similar, however it does produce meaningful results. */ |
---|
628 | return ((double) (string[0].data_length + string[1].data_length - |
---|
629 | string[1].edit_count - string[0].edit_count) / (string[0].data_length |
---|
630 | + string[1].data_length)); |
---|
631 | } |
---|