1 | : Use /bin/sh |
---|
2 | # |
---|
3 | # $Id: tryaffix.X,v 1.1.1.1 1997-09-03 21:08:09 ghudson Exp $ |
---|
4 | # |
---|
5 | # Copyright 1987, 1988, 1989, 1992, 1993, Geoff Kuenning, Granada Hills, CA |
---|
6 | # All rights reserved. |
---|
7 | # |
---|
8 | # Redistribution and use in source and binary forms, with or without |
---|
9 | # modification, are permitted provided that the following conditions |
---|
10 | # are met: |
---|
11 | # |
---|
12 | # 1. Redistributions of source code must retain the above copyright |
---|
13 | # notice, this list of conditions and the following disclaimer. |
---|
14 | # 2. Redistributions in binary form must reproduce the above copyright |
---|
15 | # notice, this list of conditions and the following disclaimer in the |
---|
16 | # documentation and/or other materials provided with the distribution. |
---|
17 | # 3. All modifications to the source code must be clearly marked as |
---|
18 | # such. Binary redistributions based on modified source code |
---|
19 | # must be clearly marked as modified versions in the documentation |
---|
20 | # and/or other materials provided with the distribution. |
---|
21 | # 4. All advertising materials mentioning features or use of this software |
---|
22 | # must display the following acknowledgment: |
---|
23 | # This product includes software developed by Geoff Kuenning and |
---|
24 | # other unpaid contributors. |
---|
25 | # 5. The name of Geoff Kuenning may not be used to endorse or promote |
---|
26 | # products derived from this software without specific prior |
---|
27 | # written permission. |
---|
28 | # |
---|
29 | # THIS SOFTWARE IS PROVIDED BY GEOFF KUENNING AND CONTRIBUTORS ``AS IS'' AND |
---|
30 | # ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
---|
31 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
---|
32 | # ARE DISCLAIMED. IN NO EVENT SHALL GEOFF KUENNING OR CONTRIBUTORS BE LIABLE |
---|
33 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
---|
34 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS |
---|
35 | # OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) |
---|
36 | # HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT |
---|
37 | # LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY |
---|
38 | # OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF |
---|
39 | # SUCH DAMAGE. |
---|
40 | # |
---|
41 | # Try out affixes to see if they produce valid roots |
---|
42 | # |
---|
43 | # Usage: |
---|
44 | # |
---|
45 | # tryaffix [-p | -s] [-c] dict-file affix[+addition] ... |
---|
46 | # |
---|
47 | # The -p and -s flags specify whether prefixes or suffixes |
---|
48 | # are being tried; if neither is specified, suffixes are assumed. |
---|
49 | # |
---|
50 | # If the -c flag is given, statistics on the various affixes are given: |
---|
51 | # a count of words it potentially applies to, and an estimate of the |
---|
52 | # number of dictionary bytes the flag would save. The estimate will |
---|
53 | # be high if the flag generates words that are currently generated |
---|
54 | # by other flags. |
---|
55 | # |
---|
56 | # The dictionary file, dict-file, must already be expanded and sorted, |
---|
57 | # and things will work best if uppercase has been folded to lower with |
---|
58 | # 'tr'. |
---|
59 | # |
---|
60 | # The "affixes" are things to be stripped from the dictionary |
---|
61 | # file to produce trial roots: for English, "con" and "ing" |
---|
62 | # are examples. The "additions" are letters that would have |
---|
63 | # been stripped off the root before adding the affix. For |
---|
64 | # example, the affix "ing" strips "e" for words ending in "e" |
---|
65 | # (as in "like --> liking") so we might run: |
---|
66 | # |
---|
67 | # tryaffix ing ing+e |
---|
68 | # |
---|
69 | # to cover both cases. |
---|
70 | # |
---|
71 | # $Log: not supported by cvs2svn $ |
---|
72 | # Revision 1.6 1994/01/25 07:12:18 geoff |
---|
73 | # Get rid of all old RCS log lines in preparation for the 3.1 release. |
---|
74 | # |
---|
75 | # |
---|
76 | SORTTMP="-T ${TMPDIR-/usr/tmp}" # !!SORTTMP!! |
---|
77 | USAGE='tryaffix [-p | -s] [-c] dict-file affix[+addition] ...' |
---|
78 | counts=no |
---|
79 | pre= |
---|
80 | suf='$' |
---|
81 | while : |
---|
82 | do |
---|
83 | case "$1" in |
---|
84 | -p) |
---|
85 | pre='^' |
---|
86 | suf= |
---|
87 | ;; |
---|
88 | -s) |
---|
89 | pre= |
---|
90 | suf='$' |
---|
91 | ;; |
---|
92 | -c) |
---|
93 | counts=yes |
---|
94 | ;; |
---|
95 | -*) |
---|
96 | echo "$USAGE" 1>&2 |
---|
97 | exit 1 |
---|
98 | ;; |
---|
99 | *) |
---|
100 | break |
---|
101 | ;; |
---|
102 | esac |
---|
103 | shift |
---|
104 | done |
---|
105 | dict="$1" |
---|
106 | shift |
---|
107 | if [ ! -r "$dict" ] |
---|
108 | then |
---|
109 | echo "Can't read $dict" 1>&2 |
---|
110 | echo "$USAGE" 1>&2 |
---|
111 | exit 1 |
---|
112 | elif [ $# -eq 0 ] |
---|
113 | then |
---|
114 | echo "$USAGE" 1>&2 |
---|
115 | exit 1 |
---|
116 | fi |
---|
117 | while [ $# -ne 0 ] |
---|
118 | do |
---|
119 | case "$1" in |
---|
120 | *+*) |
---|
121 | affix=`expr "$1" : '\(.*\)+'` |
---|
122 | addition=`expr "$1" : '.*+\(.*\)'` |
---|
123 | sedscript="s/$pre$affix$suf/$addition/p" |
---|
124 | ;; |
---|
125 | *) |
---|
126 | sedscript="s/$pre$1$suf//p" |
---|
127 | ;; |
---|
128 | esac |
---|
129 | if [ "$counts" = no ] |
---|
130 | then |
---|
131 | echo ===== "$1" ===== |
---|
132 | sed -n "$sedscript" "$dict" | comm -12 - "$dict" |
---|
133 | else |
---|
134 | echo "$1" `sed -n "$sedscript" "$dict" | comm -12 - "$dict" | wc -lc` |
---|
135 | fi |
---|
136 | shift |
---|
137 | done |
---|