1 | : Use /bin/sh |
---|
2 | # |
---|
3 | # $Id: zapdups.X,v 1.1.1.1 1997-09-03 21:08:13 ghudson Exp $ |
---|
4 | # |
---|
5 | # Copyright 1993, Geoff Kuenning, Granada Hills, CA |
---|
6 | # All rights reserved. |
---|
7 | # |
---|
8 | # Redistribution and use in source and binary forms, with or without |
---|
9 | # modification, are permitted provided that the following conditions |
---|
10 | # are met: |
---|
11 | # |
---|
12 | # 1. Redistributions of source code must retain the above copyright |
---|
13 | # notice, this list of conditions and the following disclaimer. |
---|
14 | # 2. Redistributions in binary form must reproduce the above copyright |
---|
15 | # notice, this list of conditions and the following disclaimer in the |
---|
16 | # documentation and/or other materials provided with the distribution. |
---|
17 | # 3. All modifications to the source code must be clearly marked as |
---|
18 | # such. Binary redistributions based on modified source code |
---|
19 | # must be clearly marked as modified versions in the documentation |
---|
20 | # and/or other materials provided with the distribution. |
---|
21 | # 4. All advertising materials mentioning features or use of this software |
---|
22 | # must display the following acknowledgment: |
---|
23 | # This product includes software developed by Geoff Kuenning and |
---|
24 | # other unpaid contributors. |
---|
25 | # 5. The name of Geoff Kuenning may not be used to endorse or promote |
---|
26 | # products derived from this software without specific prior |
---|
27 | # written permission. |
---|
28 | # |
---|
29 | # THIS SOFTWARE IS PROVIDED BY GEOFF KUENNING AND CONTRIBUTORS ``AS IS'' AND |
---|
30 | # ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
---|
31 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
---|
32 | # ARE DISCLAIMED. IN NO EVENT SHALL GEOFF KUENNING OR CONTRIBUTORS BE LIABLE |
---|
33 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
---|
34 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS |
---|
35 | # OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) |
---|
36 | # HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT |
---|
37 | # LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY |
---|
38 | # OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF |
---|
39 | # SUCH DAMAGE. |
---|
40 | # |
---|
41 | # Report or get rid of duplicates in various components of a dictionary. |
---|
42 | # |
---|
43 | # Usage: |
---|
44 | # |
---|
45 | # zapdups [-d [-n]] [-l langfile] dict-0 dict-1 ... |
---|
46 | # |
---|
47 | # Dictionaries starting with dict-1 (not dict-0!) are examined, |
---|
48 | # looking for words that appear in any earlier dictionary. If any |
---|
49 | # duplicates are found, they are reported to the standard output. |
---|
50 | # |
---|
51 | # If the -d switch is specified, duplicates are removed from later |
---|
52 | # dictionaries. The modification is done in-place. This switch |
---|
53 | # should normally be used after examining the output of an earlier |
---|
54 | # run. The -d switch takes a long time to run, because it uses |
---|
55 | # munchlist to reduce the dictionary once duplicates are removed. |
---|
56 | # The -n switch can be used to suppress the running of munchlist, to |
---|
57 | # save time. |
---|
58 | # |
---|
59 | # If the -l switch is specified, the language tables are gotten from |
---|
60 | # the specified file; otherwise they come from $LIBDIR/!!DEFLANG!!. |
---|
61 | # |
---|
62 | # $Log: not supported by cvs2svn $ |
---|
63 | # Revision 1.6 1995/01/08 23:23:58 geoff |
---|
64 | # Support variable hashfile suffixes for DOS purposes. |
---|
65 | # |
---|
66 | # Revision 1.5 1994/01/25 07:12:24 geoff |
---|
67 | # Get rid of all old RCS log lines in preparation for the 3.1 release. |
---|
68 | # |
---|
69 | # |
---|
70 | LIBDIR=!!LIBDIR!! |
---|
71 | TDIR=${TMPDIR-/usr/tmp} |
---|
72 | TMP=${TDIR}/zd$$ |
---|
73 | SORTTMP="-T ${TDIR}" # !!SORTTMP!! |
---|
74 | USAGE="zapdups [-d [-n]] [-l langfile] dict-0 dict-1 ..." |
---|
75 | |
---|
76 | delete=no |
---|
77 | munchit=yes |
---|
78 | langtabs=${LIBDIR}/!!DEFLANG!! |
---|
79 | while : |
---|
80 | do |
---|
81 | case "$1" in |
---|
82 | -d) |
---|
83 | delete=yes |
---|
84 | shift |
---|
85 | ;; |
---|
86 | -l) |
---|
87 | langtabs="$2" |
---|
88 | shift; shift |
---|
89 | ;; |
---|
90 | -n) |
---|
91 | munchit=no |
---|
92 | shift |
---|
93 | ;; |
---|
94 | -*) |
---|
95 | echo "$USAGE" 1>&2 |
---|
96 | exit 1 |
---|
97 | ;; |
---|
98 | *) |
---|
99 | break |
---|
100 | ;; |
---|
101 | esac |
---|
102 | done |
---|
103 | |
---|
104 | if [ $# -lt 2 ] |
---|
105 | then |
---|
106 | echo "$USAGE" 1>&2 |
---|
107 | exit 1 |
---|
108 | fi |
---|
109 | |
---|
110 | FAKEHASH=$TMP.a!!HASHSUFFIX!! |
---|
111 | FAKEDICT=$TMP.b |
---|
112 | SEEN=$TMP.c |
---|
113 | LATEST=$TMP.d |
---|
114 | DUPS=$TMP.e |
---|
115 | |
---|
116 | trap "rm -f $TMP.*; exit 1" 1 2 15 |
---|
117 | trap "rm -f $TMP.*; exit 0" 13 |
---|
118 | |
---|
119 | # |
---|
120 | # Create a dummy dictionary to hold a compiled copy of the language |
---|
121 | # tables. |
---|
122 | # |
---|
123 | echo 'QQQQQQQQ' > $FAKEDICT |
---|
124 | buildhash -s $FAKEDICT $langtabs $FAKEHASH \ |
---|
125 | || (echo "Couldn't create fake hash file" 1>&2; rm -f $TMP.*; exit 1) \ |
---|
126 | || exit 1 |
---|
127 | rm -f ${FAKEDICT}* |
---|
128 | |
---|
129 | nl=' |
---|
130 | ' |
---|
131 | # |
---|
132 | # Expand dictionary 0 into a temp file |
---|
133 | # |
---|
134 | ispell -e -d $FAKEHASH < "$1" \ |
---|
135 | | tr ' ' "$nl" \ |
---|
136 | | sort $SORTTMP -u \ |
---|
137 | | sed 's@$@ '"$1@" \ |
---|
138 | > $SEEN |
---|
139 | shift |
---|
140 | |
---|
141 | # |
---|
142 | # For each subsequent dictionary: |
---|
143 | # |
---|
144 | # (1) Expand it into a temp file |
---|
145 | # (2) Use join to report the duplicates |
---|
146 | # (3) If we are editing, use comm to remove the duplicates |
---|
147 | # (4) Add the expanded dictionary (sans duplicates) to the list |
---|
148 | # of words already seen. |
---|
149 | # |
---|
150 | for dict |
---|
151 | do |
---|
152 | ispell -e -d $FAKEHASH < "$dict" \ |
---|
153 | | tr ' ' "$nl" \ |
---|
154 | | sort $SORTTMP -u \ |
---|
155 | | sed 's@$@ '"$dict@" \ |
---|
156 | > $LATEST |
---|
157 | join '-t ' $SEEN $LATEST > $DUPS |
---|
158 | if [ -s $DUPS ] |
---|
159 | then |
---|
160 | cat $DUPS |
---|
161 | if [ $delete = yes ] |
---|
162 | then |
---|
163 | sed "s@ .* $dict@ $dict@" $DUPS \ |
---|
164 | | comm -23 $LATEST - \ |
---|
165 | | sed "s@ $dict@@" \ |
---|
166 | | if [ $munchit = yes ] |
---|
167 | then |
---|
168 | munchlist -l "$langtabs" > "$dict" |
---|
169 | else |
---|
170 | sort $SORTTMP -u -o "$dict" |
---|
171 | fi |
---|
172 | fi |
---|
173 | fi |
---|
174 | # We must do a shift so that $# remains correct |
---|
175 | shift |
---|
176 | if [ $# -gt 0 ] |
---|
177 | then |
---|
178 | sort $SORTTMP -u -o $SEEN $LATEST $SEEN |
---|
179 | fi |
---|
180 | done \ |
---|
181 | | sort -u |
---|
182 | rm -f $TMP.* |
---|