1 | /*- |
---|
2 | * Copyright (c) 1992, 1993, 1994 |
---|
3 | * The Regents of the University of California. All rights reserved. |
---|
4 | * Copyright (c) 1992, 1993, 1994, 1995, 1996 |
---|
5 | * Keith Bostic. All rights reserved. |
---|
6 | * |
---|
7 | * See the LICENSE file for redistribution information. |
---|
8 | */ |
---|
9 | |
---|
10 | #include "config.h" |
---|
11 | |
---|
12 | #ifndef lint |
---|
13 | static const char sccsid[] = "@(#)v_sentence.c 10.7 (Berkeley) 3/6/96"; |
---|
14 | #endif /* not lint */ |
---|
15 | |
---|
16 | #include <sys/types.h> |
---|
17 | #include <sys/queue.h> |
---|
18 | #include <sys/time.h> |
---|
19 | |
---|
20 | #include <bitstring.h> |
---|
21 | #include <ctype.h> |
---|
22 | #include <limits.h> |
---|
23 | #include <stdio.h> |
---|
24 | |
---|
25 | #include "../common/common.h" |
---|
26 | #include "vi.h" |
---|
27 | |
---|
28 | /* |
---|
29 | * !!! |
---|
30 | * In historic vi, a sentence was delimited by a '.', '?' or '!' character |
---|
31 | * followed by TWO spaces or a newline. One or more empty lines was also |
---|
32 | * treated as a separate sentence. The Berkeley documentation for historical |
---|
33 | * vi states that any number of ')', ']', '"' and '\'' characters can be |
---|
34 | * between the delimiter character and the spaces or end of line, however, |
---|
35 | * the historical implementation did not handle additional '"' characters. |
---|
36 | * We follow the documentation here, not the implementation. |
---|
37 | * |
---|
38 | * Once again, historical vi didn't do sentence movements associated with |
---|
39 | * counts consistently, mostly in the presence of lines containing only |
---|
40 | * white-space characters. |
---|
41 | * |
---|
42 | * This implementation also permits a single tab to delimit sentences, and |
---|
43 | * treats lines containing only white-space characters as empty lines. |
---|
44 | * Finally, tabs are eaten (along with spaces) when skipping to the start |
---|
45 | * of the text following a "sentence". |
---|
46 | */ |
---|
47 | |
---|
48 | /* |
---|
49 | * v_sentencef -- [count]) |
---|
50 | * Move forward count sentences. |
---|
51 | * |
---|
52 | * PUBLIC: int v_sentencef __P((SCR *, VICMD *)); |
---|
53 | */ |
---|
54 | int |
---|
55 | v_sentencef(sp, vp) |
---|
56 | SCR *sp; |
---|
57 | VICMD *vp; |
---|
58 | { |
---|
59 | enum { BLANK, NONE, PERIOD } state; |
---|
60 | VCS cs; |
---|
61 | size_t len; |
---|
62 | u_long cnt; |
---|
63 | |
---|
64 | cs.cs_lno = vp->m_start.lno; |
---|
65 | cs.cs_cno = vp->m_start.cno; |
---|
66 | if (cs_init(sp, &cs)) |
---|
67 | return (1); |
---|
68 | |
---|
69 | cnt = F_ISSET(vp, VC_C1SET) ? vp->count : 1; |
---|
70 | |
---|
71 | /* |
---|
72 | * !!! |
---|
73 | * If in white-space, the next start of sentence counts as one. |
---|
74 | * This may not handle " . " correctly, but it's real unclear |
---|
75 | * what correctly means in that case. |
---|
76 | */ |
---|
77 | if (cs.cs_flags == CS_EMP || cs.cs_flags == 0 && isblank(cs.cs_ch)) { |
---|
78 | if (cs_fblank(sp, &cs)) |
---|
79 | return (1); |
---|
80 | if (--cnt == 0) { |
---|
81 | if (vp->m_start.lno != cs.cs_lno || |
---|
82 | vp->m_start.cno != cs.cs_cno) |
---|
83 | goto okret; |
---|
84 | return (1); |
---|
85 | } |
---|
86 | } |
---|
87 | |
---|
88 | for (state = NONE;;) { |
---|
89 | if (cs_next(sp, &cs)) |
---|
90 | return (1); |
---|
91 | if (cs.cs_flags == CS_EOF) |
---|
92 | break; |
---|
93 | if (cs.cs_flags == CS_EOL) { |
---|
94 | if ((state == PERIOD || state == BLANK) && --cnt == 0) { |
---|
95 | if (cs_next(sp, &cs)) |
---|
96 | return (1); |
---|
97 | if (cs.cs_flags == 0 && |
---|
98 | isblank(cs.cs_ch) && cs_fblank(sp, &cs)) |
---|
99 | return (1); |
---|
100 | goto okret; |
---|
101 | } |
---|
102 | state = NONE; |
---|
103 | continue; |
---|
104 | } |
---|
105 | if (cs.cs_flags == CS_EMP) { /* An EMP is two sentences. */ |
---|
106 | if (--cnt == 0) |
---|
107 | goto okret; |
---|
108 | if (cs_fblank(sp, &cs)) |
---|
109 | return (1); |
---|
110 | if (--cnt == 0) |
---|
111 | goto okret; |
---|
112 | state = NONE; |
---|
113 | continue; |
---|
114 | } |
---|
115 | switch (cs.cs_ch) { |
---|
116 | case '.': |
---|
117 | case '?': |
---|
118 | case '!': |
---|
119 | state = PERIOD; |
---|
120 | break; |
---|
121 | case ')': |
---|
122 | case ']': |
---|
123 | case '"': |
---|
124 | case '\'': |
---|
125 | if (state != PERIOD) |
---|
126 | state = NONE; |
---|
127 | break; |
---|
128 | case '\t': |
---|
129 | if (state == PERIOD) |
---|
130 | state = BLANK; |
---|
131 | /* FALLTHROUGH */ |
---|
132 | case ' ': |
---|
133 | if (state == PERIOD) { |
---|
134 | state = BLANK; |
---|
135 | break; |
---|
136 | } |
---|
137 | if (state == BLANK && --cnt == 0) { |
---|
138 | if (cs_fblank(sp, &cs)) |
---|
139 | return (1); |
---|
140 | goto okret; |
---|
141 | } |
---|
142 | /* FALLTHROUGH */ |
---|
143 | default: |
---|
144 | state = NONE; |
---|
145 | break; |
---|
146 | } |
---|
147 | } |
---|
148 | |
---|
149 | /* EOF is a movement sink, but it's an error not to have moved. */ |
---|
150 | if (vp->m_start.lno == cs.cs_lno && vp->m_start.cno == cs.cs_cno) { |
---|
151 | v_eof(sp, NULL); |
---|
152 | return (1); |
---|
153 | } |
---|
154 | |
---|
155 | okret: vp->m_stop.lno = cs.cs_lno; |
---|
156 | vp->m_stop.cno = cs.cs_cno; |
---|
157 | |
---|
158 | /* |
---|
159 | * !!! |
---|
160 | * Historic, uh, features, yeah, that's right, call 'em features. |
---|
161 | * If the starting and ending cursor positions are at the first |
---|
162 | * column in their lines, i.e. the movement is cutting entire lines, |
---|
163 | * the buffer is in line mode, and the ending position is the last |
---|
164 | * character of the previous line. Note check to make sure that |
---|
165 | * it's not within a single line. |
---|
166 | * |
---|
167 | * Non-motion commands move to the end of the range. Delete and |
---|
168 | * yank stay at the start. Ignore others. Adjust the end of the |
---|
169 | * range for motion commands. |
---|
170 | */ |
---|
171 | if (ISMOTION(vp)) { |
---|
172 | if (vp->m_start.cno == 0 && |
---|
173 | (cs.cs_flags != 0 || vp->m_stop.cno == 0)) { |
---|
174 | if (vp->m_start.lno < vp->m_stop.lno) { |
---|
175 | if (db_get(sp, |
---|
176 | --vp->m_stop.lno, DBG_FATAL, NULL, &len)) |
---|
177 | return (1); |
---|
178 | vp->m_stop.cno = len ? len - 1 : 0; |
---|
179 | } |
---|
180 | F_SET(vp, VM_LMODE); |
---|
181 | } else |
---|
182 | --vp->m_stop.cno; |
---|
183 | vp->m_final = vp->m_start; |
---|
184 | } else |
---|
185 | vp->m_final = vp->m_stop; |
---|
186 | return (0); |
---|
187 | } |
---|
188 | |
---|
189 | /* |
---|
190 | * v_sentenceb -- [count]( |
---|
191 | * Move backward count sentences. |
---|
192 | * |
---|
193 | * PUBLIC: int v_sentenceb __P((SCR *, VICMD *)); |
---|
194 | */ |
---|
195 | int |
---|
196 | v_sentenceb(sp, vp) |
---|
197 | SCR *sp; |
---|
198 | VICMD *vp; |
---|
199 | { |
---|
200 | VCS cs; |
---|
201 | recno_t slno; |
---|
202 | size_t len, scno; |
---|
203 | u_long cnt; |
---|
204 | int last; |
---|
205 | |
---|
206 | /* |
---|
207 | * !!! |
---|
208 | * Historic vi permitted the user to hit SOF repeatedly. |
---|
209 | */ |
---|
210 | if (vp->m_start.lno == 1 && vp->m_start.cno == 0) |
---|
211 | return (0); |
---|
212 | |
---|
213 | cs.cs_lno = vp->m_start.lno; |
---|
214 | cs.cs_cno = vp->m_start.cno; |
---|
215 | if (cs_init(sp, &cs)) |
---|
216 | return (1); |
---|
217 | |
---|
218 | cnt = F_ISSET(vp, VC_C1SET) ? vp->count : 1; |
---|
219 | |
---|
220 | /* |
---|
221 | * !!! |
---|
222 | * In empty lines, skip to the previous non-white-space character. |
---|
223 | * If in text, skip to the prevous white-space character. Believe |
---|
224 | * it or not, in the paragraph: |
---|
225 | * ab cd. |
---|
226 | * AB CD. |
---|
227 | * if the cursor is on the 'A' or 'B', ( moves to the 'a'. If it |
---|
228 | * is on the ' ', 'C' or 'D', it moves to the 'A'. Yes, Virginia, |
---|
229 | * Berkeley was once a major center of drug activity. |
---|
230 | */ |
---|
231 | if (cs.cs_flags == CS_EMP) { |
---|
232 | if (cs_bblank(sp, &cs)) |
---|
233 | return (1); |
---|
234 | for (;;) { |
---|
235 | if (cs_prev(sp, &cs)) |
---|
236 | return (1); |
---|
237 | if (cs.cs_flags != CS_EOL) |
---|
238 | break; |
---|
239 | } |
---|
240 | } else if (cs.cs_flags == 0 && !isblank(cs.cs_ch)) |
---|
241 | for (;;) { |
---|
242 | if (cs_prev(sp, &cs)) |
---|
243 | return (1); |
---|
244 | if (cs.cs_flags != 0 || isblank(cs.cs_ch)) |
---|
245 | break; |
---|
246 | } |
---|
247 | |
---|
248 | for (last = 0;;) { |
---|
249 | if (cs_prev(sp, &cs)) |
---|
250 | return (1); |
---|
251 | if (cs.cs_flags == CS_SOF) /* SOF is a movement sink. */ |
---|
252 | break; |
---|
253 | if (cs.cs_flags == CS_EOL) { |
---|
254 | last = 1; |
---|
255 | continue; |
---|
256 | } |
---|
257 | if (cs.cs_flags == CS_EMP) { |
---|
258 | if (--cnt == 0) |
---|
259 | goto ret; |
---|
260 | if (cs_bblank(sp, &cs)) |
---|
261 | return (1); |
---|
262 | last = 0; |
---|
263 | continue; |
---|
264 | } |
---|
265 | switch (cs.cs_ch) { |
---|
266 | case '.': |
---|
267 | case '?': |
---|
268 | case '!': |
---|
269 | if (!last || --cnt != 0) { |
---|
270 | last = 0; |
---|
271 | continue; |
---|
272 | } |
---|
273 | |
---|
274 | ret: slno = cs.cs_lno; |
---|
275 | scno = cs.cs_cno; |
---|
276 | |
---|
277 | /* |
---|
278 | * Move to the start of the sentence, skipping blanks |
---|
279 | * and special characters. |
---|
280 | */ |
---|
281 | do { |
---|
282 | if (cs_next(sp, &cs)) |
---|
283 | return (1); |
---|
284 | } while (!cs.cs_flags && |
---|
285 | (cs.cs_ch == ')' || cs.cs_ch == ']' || |
---|
286 | cs.cs_ch == '"' || cs.cs_ch == '\'')); |
---|
287 | if ((cs.cs_flags || isblank(cs.cs_ch)) && |
---|
288 | cs_fblank(sp, &cs)) |
---|
289 | return (1); |
---|
290 | |
---|
291 | /* |
---|
292 | * If it was ". xyz", with the cursor on the 'x', or |
---|
293 | * "end. ", with the cursor in the spaces, or the |
---|
294 | * beginning of a sentence preceded by an empty line, |
---|
295 | * we can end up where we started. Fix it. |
---|
296 | */ |
---|
297 | if (vp->m_start.lno != cs.cs_lno || |
---|
298 | vp->m_start.cno != cs.cs_cno) |
---|
299 | goto okret; |
---|
300 | |
---|
301 | /* |
---|
302 | * Well, if an empty line preceded possible blanks |
---|
303 | * and the sentence, it could be a real sentence. |
---|
304 | */ |
---|
305 | for (;;) { |
---|
306 | if (cs_prev(sp, &cs)) |
---|
307 | return (1); |
---|
308 | if (cs.cs_flags == CS_EOL) |
---|
309 | continue; |
---|
310 | if (cs.cs_flags == 0 && isblank(cs.cs_ch)) |
---|
311 | continue; |
---|
312 | break; |
---|
313 | } |
---|
314 | if (cs.cs_flags == CS_EMP) |
---|
315 | goto okret; |
---|
316 | |
---|
317 | /* But it wasn't; try again. */ |
---|
318 | ++cnt; |
---|
319 | cs.cs_lno = slno; |
---|
320 | cs.cs_cno = scno; |
---|
321 | last = 0; |
---|
322 | break; |
---|
323 | case '\t': |
---|
324 | last = 1; |
---|
325 | break; |
---|
326 | default: |
---|
327 | last = |
---|
328 | cs.cs_flags == CS_EOL || isblank(cs.cs_ch) || |
---|
329 | cs.cs_ch == ')' || cs.cs_ch == ']' || |
---|
330 | cs.cs_ch == '"' || cs.cs_ch == '\'' ? 1 : 0; |
---|
331 | } |
---|
332 | } |
---|
333 | |
---|
334 | okret: vp->m_stop.lno = cs.cs_lno; |
---|
335 | vp->m_stop.cno = cs.cs_cno; |
---|
336 | |
---|
337 | /* |
---|
338 | * !!! |
---|
339 | * If the starting and stopping cursor positions are at the first |
---|
340 | * columns in the line, i.e. the movement is cutting an entire line, |
---|
341 | * the buffer is in line mode, and the starting position is the last |
---|
342 | * character of the previous line. |
---|
343 | * |
---|
344 | * All commands move to the end of the range. Adjust the start of |
---|
345 | * the range for motion commands. |
---|
346 | */ |
---|
347 | if (ISMOTION(vp)) |
---|
348 | if (vp->m_start.cno == 0 && |
---|
349 | (cs.cs_flags != 0 || vp->m_stop.cno == 0)) { |
---|
350 | if (db_get(sp, |
---|
351 | --vp->m_start.lno, DBG_FATAL, NULL, &len)) |
---|
352 | return (1); |
---|
353 | vp->m_start.cno = len ? len - 1 : 0; |
---|
354 | F_SET(vp, VM_LMODE); |
---|
355 | } else |
---|
356 | --vp->m_start.cno; |
---|
357 | vp->m_final = vp->m_stop; |
---|
358 | return (0); |
---|
359 | } |
---|