1 | /*- |
---|
2 | * Copyright (c) 1992, 1993, 1994 |
---|
3 | * The Regents of the University of California. All rights reserved. |
---|
4 | * Copyright (c) 1992, 1993, 1994, 1995, 1996 |
---|
5 | * Keith Bostic. All rights reserved. |
---|
6 | * |
---|
7 | * See the LICENSE file for redistribution information. |
---|
8 | */ |
---|
9 | |
---|
10 | #include "config.h" |
---|
11 | |
---|
12 | #ifndef lint |
---|
13 | static const char sccsid[] = "@(#)v_word.c 10.5 (Berkeley) 3/6/96"; |
---|
14 | #endif /* not lint */ |
---|
15 | |
---|
16 | #include <sys/types.h> |
---|
17 | #include <sys/queue.h> |
---|
18 | #include <sys/time.h> |
---|
19 | |
---|
20 | #include <bitstring.h> |
---|
21 | #include <ctype.h> |
---|
22 | #include <limits.h> |
---|
23 | #include <stdio.h> |
---|
24 | |
---|
25 | #include "../common/common.h" |
---|
26 | #include "vi.h" |
---|
27 | |
---|
28 | /* |
---|
29 | * There are two types of "words". Bigwords are easy -- groups of anything |
---|
30 | * delimited by whitespace. Normal words are trickier. They are either a |
---|
31 | * group of characters, numbers and underscores, or a group of anything but, |
---|
32 | * delimited by whitespace. When for a word, if you're in whitespace, it's |
---|
33 | * easy, just remove the whitespace and go to the beginning or end of the |
---|
34 | * word. Otherwise, figure out if the next character is in a different group. |
---|
35 | * If it is, go to the beginning or end of that group, otherwise, go to the |
---|
36 | * beginning or end of the current group. The historic version of vi didn't |
---|
37 | * get this right, so, for example, there were cases where "4e" was not the |
---|
38 | * same as "eeee" -- in particular, single character words, and commands that |
---|
39 | * began in whitespace were almost always handled incorrectly. To get it right |
---|
40 | * you have to resolve the cursor after each search so that the look-ahead to |
---|
41 | * figure out what type of "word" the cursor is in will be correct. |
---|
42 | * |
---|
43 | * Empty lines, and lines that consist of only white-space characters count |
---|
44 | * as a single word, and the beginning and end of the file counts as an |
---|
45 | * infinite number of words. |
---|
46 | * |
---|
47 | * Movements associated with commands are different than movement commands. |
---|
48 | * For example, in "abc def", with the cursor on the 'a', "cw" is from |
---|
49 | * 'a' to 'c', while "w" is from 'a' to 'd'. In general, trailing white |
---|
50 | * space is discarded from the change movement. Another example is that, |
---|
51 | * in the same string, a "cw" on any white space character replaces that |
---|
52 | * single character, and nothing else. Ain't nothin' in here that's easy. |
---|
53 | * |
---|
54 | * One historic note -- in the original vi, the 'w', 'W' and 'B' commands |
---|
55 | * would treat groups of empty lines as individual words, i.e. the command |
---|
56 | * would move the cursor to each new empty line. The 'e' and 'E' commands |
---|
57 | * would treat groups of empty lines as a single word, i.e. the first use |
---|
58 | * would move past the group of lines. The 'b' command would just beep at |
---|
59 | * you, or, if you did it from the start of the line as part of a motion |
---|
60 | * command, go absolutely nuts. If the lines contained only white-space |
---|
61 | * characters, the 'w' and 'W' commands would just beep at you, and the 'B', |
---|
62 | * 'b', 'E' and 'e' commands would treat the group as a single word, and |
---|
63 | * the 'B' and 'b' commands will treat the lines as individual words. This |
---|
64 | * implementation treats all of these cases as a single white-space word. |
---|
65 | */ |
---|
66 | |
---|
67 | enum which {BIGWORD, LITTLEWORD}; |
---|
68 | |
---|
69 | static int bword __P((SCR *, VICMD *, enum which)); |
---|
70 | static int eword __P((SCR *, VICMD *, enum which)); |
---|
71 | static int fword __P((SCR *, VICMD *, enum which)); |
---|
72 | |
---|
73 | /* |
---|
74 | * v_wordW -- [count]W |
---|
75 | * Move forward a bigword at a time. |
---|
76 | * |
---|
77 | * PUBLIC: int v_wordW __P((SCR *, VICMD *)); |
---|
78 | */ |
---|
79 | int |
---|
80 | v_wordW(sp, vp) |
---|
81 | SCR *sp; |
---|
82 | VICMD *vp; |
---|
83 | { |
---|
84 | return (fword(sp, vp, BIGWORD)); |
---|
85 | } |
---|
86 | |
---|
87 | /* |
---|
88 | * v_wordw -- [count]w |
---|
89 | * Move forward a word at a time. |
---|
90 | * |
---|
91 | * PUBLIC: int v_wordw __P((SCR *, VICMD *)); |
---|
92 | */ |
---|
93 | int |
---|
94 | v_wordw(sp, vp) |
---|
95 | SCR *sp; |
---|
96 | VICMD *vp; |
---|
97 | { |
---|
98 | return (fword(sp, vp, LITTLEWORD)); |
---|
99 | } |
---|
100 | |
---|
101 | /* |
---|
102 | * fword -- |
---|
103 | * Move forward by words. |
---|
104 | */ |
---|
105 | static int |
---|
106 | fword(sp, vp, type) |
---|
107 | SCR *sp; |
---|
108 | VICMD *vp; |
---|
109 | enum which type; |
---|
110 | { |
---|
111 | enum { INWORD, NOTWORD } state; |
---|
112 | VCS cs; |
---|
113 | u_long cnt; |
---|
114 | |
---|
115 | cnt = F_ISSET(vp, VC_C1SET) ? vp->count : 1; |
---|
116 | cs.cs_lno = vp->m_start.lno; |
---|
117 | cs.cs_cno = vp->m_start.cno; |
---|
118 | if (cs_init(sp, &cs)) |
---|
119 | return (1); |
---|
120 | |
---|
121 | /* |
---|
122 | * If in white-space: |
---|
123 | * If the count is 1, and it's a change command, we're done. |
---|
124 | * Else, move to the first non-white-space character, which |
---|
125 | * counts as a single word move. If it's a motion command, |
---|
126 | * don't move off the end of the line. |
---|
127 | */ |
---|
128 | if (cs.cs_flags == CS_EMP || cs.cs_flags == 0 && isblank(cs.cs_ch)) { |
---|
129 | if (ISMOTION(vp) && cs.cs_flags != CS_EMP && cnt == 1) { |
---|
130 | if (ISCMD(vp->rkp, 'c')) |
---|
131 | return (0); |
---|
132 | if (ISCMD(vp->rkp, 'd') || ISCMD(vp->rkp, 'y')) { |
---|
133 | if (cs_fspace(sp, &cs)) |
---|
134 | return (1); |
---|
135 | goto ret; |
---|
136 | } |
---|
137 | } |
---|
138 | if (cs_fblank(sp, &cs)) |
---|
139 | return (1); |
---|
140 | --cnt; |
---|
141 | } |
---|
142 | |
---|
143 | /* |
---|
144 | * Cyclically move to the next word -- this involves skipping |
---|
145 | * over word characters and then any trailing non-word characters. |
---|
146 | * Note, for the 'w' command, the definition of a word keeps |
---|
147 | * switching. |
---|
148 | */ |
---|
149 | if (type == BIGWORD) |
---|
150 | while (cnt--) { |
---|
151 | for (;;) { |
---|
152 | if (cs_next(sp, &cs)) |
---|
153 | return (1); |
---|
154 | if (cs.cs_flags == CS_EOF) |
---|
155 | goto ret; |
---|
156 | if (cs.cs_flags != 0 || isblank(cs.cs_ch)) |
---|
157 | break; |
---|
158 | } |
---|
159 | /* |
---|
160 | * If a motion command and we're at the end of the |
---|
161 | * last word, we're done. Delete and yank eat any |
---|
162 | * trailing blanks, but we don't move off the end |
---|
163 | * of the line regardless. |
---|
164 | */ |
---|
165 | if (cnt == 0 && ISMOTION(vp)) { |
---|
166 | if ((ISCMD(vp->rkp, 'd') || |
---|
167 | ISCMD(vp->rkp, 'y')) && |
---|
168 | cs_fspace(sp, &cs)) |
---|
169 | return (1); |
---|
170 | break; |
---|
171 | } |
---|
172 | |
---|
173 | /* Eat whitespace characters. */ |
---|
174 | if (cs_fblank(sp, &cs)) |
---|
175 | return (1); |
---|
176 | if (cs.cs_flags == CS_EOF) |
---|
177 | goto ret; |
---|
178 | } |
---|
179 | else |
---|
180 | while (cnt--) { |
---|
181 | state = cs.cs_flags == 0 && |
---|
182 | inword(cs.cs_ch) ? INWORD : NOTWORD; |
---|
183 | for (;;) { |
---|
184 | if (cs_next(sp, &cs)) |
---|
185 | return (1); |
---|
186 | if (cs.cs_flags == CS_EOF) |
---|
187 | goto ret; |
---|
188 | if (cs.cs_flags != 0 || isblank(cs.cs_ch)) |
---|
189 | break; |
---|
190 | if (state == INWORD) { |
---|
191 | if (!inword(cs.cs_ch)) |
---|
192 | break; |
---|
193 | } else |
---|
194 | if (inword(cs.cs_ch)) |
---|
195 | break; |
---|
196 | } |
---|
197 | /* See comment above. */ |
---|
198 | if (cnt == 0 && ISMOTION(vp)) { |
---|
199 | if ((ISCMD(vp->rkp, 'd') || |
---|
200 | ISCMD(vp->rkp, 'y')) && |
---|
201 | cs_fspace(sp, &cs)) |
---|
202 | return (1); |
---|
203 | break; |
---|
204 | } |
---|
205 | |
---|
206 | /* Eat whitespace characters. */ |
---|
207 | if (cs.cs_flags != 0 || isblank(cs.cs_ch)) |
---|
208 | if (cs_fblank(sp, &cs)) |
---|
209 | return (1); |
---|
210 | if (cs.cs_flags == CS_EOF) |
---|
211 | goto ret; |
---|
212 | } |
---|
213 | |
---|
214 | /* |
---|
215 | * If we didn't move, we must be at EOF. |
---|
216 | * |
---|
217 | * !!! |
---|
218 | * That's okay for motion commands, however. |
---|
219 | */ |
---|
220 | ret: if (!ISMOTION(vp) && |
---|
221 | cs.cs_lno == vp->m_start.lno && cs.cs_cno == vp->m_start.cno) { |
---|
222 | v_eof(sp, &vp->m_start); |
---|
223 | return (1); |
---|
224 | } |
---|
225 | |
---|
226 | /* Adjust the end of the range for motion commands. */ |
---|
227 | vp->m_stop.lno = cs.cs_lno; |
---|
228 | vp->m_stop.cno = cs.cs_cno; |
---|
229 | if (ISMOTION(vp) && cs.cs_flags == 0) |
---|
230 | --vp->m_stop.cno; |
---|
231 | |
---|
232 | /* |
---|
233 | * Non-motion commands move to the end of the range. Delete |
---|
234 | * and yank stay at the start, ignore others. |
---|
235 | */ |
---|
236 | vp->m_final = ISMOTION(vp) ? vp->m_start : vp->m_stop; |
---|
237 | return (0); |
---|
238 | } |
---|
239 | |
---|
240 | /* |
---|
241 | * v_wordE -- [count]E |
---|
242 | * Move forward to the end of the bigword. |
---|
243 | * |
---|
244 | * PUBLIC: int v_wordE __P((SCR *, VICMD *)); |
---|
245 | */ |
---|
246 | int |
---|
247 | v_wordE(sp, vp) |
---|
248 | SCR *sp; |
---|
249 | VICMD *vp; |
---|
250 | { |
---|
251 | return (eword(sp, vp, BIGWORD)); |
---|
252 | } |
---|
253 | |
---|
254 | /* |
---|
255 | * v_worde -- [count]e |
---|
256 | * Move forward to the end of the word. |
---|
257 | * |
---|
258 | * PUBLIC: int v_worde __P((SCR *, VICMD *)); |
---|
259 | */ |
---|
260 | int |
---|
261 | v_worde(sp, vp) |
---|
262 | SCR *sp; |
---|
263 | VICMD *vp; |
---|
264 | { |
---|
265 | return (eword(sp, vp, LITTLEWORD)); |
---|
266 | } |
---|
267 | |
---|
268 | /* |
---|
269 | * eword -- |
---|
270 | * Move forward to the end of the word. |
---|
271 | */ |
---|
272 | static int |
---|
273 | eword(sp, vp, type) |
---|
274 | SCR *sp; |
---|
275 | VICMD *vp; |
---|
276 | enum which type; |
---|
277 | { |
---|
278 | enum { INWORD, NOTWORD } state; |
---|
279 | VCS cs; |
---|
280 | u_long cnt; |
---|
281 | |
---|
282 | cnt = F_ISSET(vp, VC_C1SET) ? vp->count : 1; |
---|
283 | cs.cs_lno = vp->m_start.lno; |
---|
284 | cs.cs_cno = vp->m_start.cno; |
---|
285 | if (cs_init(sp, &cs)) |
---|
286 | return (1); |
---|
287 | |
---|
288 | /* |
---|
289 | * !!! |
---|
290 | * If in whitespace, or the next character is whitespace, move past |
---|
291 | * it. (This doesn't count as a word move.) Stay at the character |
---|
292 | * past the current one, it sets word "state" for the 'e' command. |
---|
293 | */ |
---|
294 | if (cs.cs_flags == 0 && !isblank(cs.cs_ch)) { |
---|
295 | if (cs_next(sp, &cs)) |
---|
296 | return (1); |
---|
297 | if (cs.cs_flags == 0 && !isblank(cs.cs_ch)) |
---|
298 | goto start; |
---|
299 | } |
---|
300 | if (cs_fblank(sp, &cs)) |
---|
301 | return (1); |
---|
302 | |
---|
303 | /* |
---|
304 | * Cyclically move to the next word -- this involves skipping |
---|
305 | * over word characters and then any trailing non-word characters. |
---|
306 | * Note, for the 'e' command, the definition of a word keeps |
---|
307 | * switching. |
---|
308 | */ |
---|
309 | start: if (type == BIGWORD) |
---|
310 | while (cnt--) { |
---|
311 | for (;;) { |
---|
312 | if (cs_next(sp, &cs)) |
---|
313 | return (1); |
---|
314 | if (cs.cs_flags == CS_EOF) |
---|
315 | goto ret; |
---|
316 | if (cs.cs_flags != 0 || isblank(cs.cs_ch)) |
---|
317 | break; |
---|
318 | } |
---|
319 | /* |
---|
320 | * When we reach the start of the word after the last |
---|
321 | * word, we're done. If we changed state, back up one |
---|
322 | * to the end of the previous word. |
---|
323 | */ |
---|
324 | if (cnt == 0) { |
---|
325 | if (cs.cs_flags == 0 && cs_prev(sp, &cs)) |
---|
326 | return (1); |
---|
327 | break; |
---|
328 | } |
---|
329 | |
---|
330 | /* Eat whitespace characters. */ |
---|
331 | if (cs_fblank(sp, &cs)) |
---|
332 | return (1); |
---|
333 | if (cs.cs_flags == CS_EOF) |
---|
334 | goto ret; |
---|
335 | } |
---|
336 | else |
---|
337 | while (cnt--) { |
---|
338 | state = cs.cs_flags == 0 && |
---|
339 | inword(cs.cs_ch) ? INWORD : NOTWORD; |
---|
340 | for (;;) { |
---|
341 | if (cs_next(sp, &cs)) |
---|
342 | return (1); |
---|
343 | if (cs.cs_flags == CS_EOF) |
---|
344 | goto ret; |
---|
345 | if (cs.cs_flags != 0 || isblank(cs.cs_ch)) |
---|
346 | break; |
---|
347 | if (state == INWORD) { |
---|
348 | if (!inword(cs.cs_ch)) |
---|
349 | break; |
---|
350 | } else |
---|
351 | if (inword(cs.cs_ch)) |
---|
352 | break; |
---|
353 | } |
---|
354 | /* See comment above. */ |
---|
355 | if (cnt == 0) { |
---|
356 | if (cs.cs_flags == 0 && cs_prev(sp, &cs)) |
---|
357 | return (1); |
---|
358 | break; |
---|
359 | } |
---|
360 | |
---|
361 | /* Eat whitespace characters. */ |
---|
362 | if (cs.cs_flags != 0 || isblank(cs.cs_ch)) |
---|
363 | if (cs_fblank(sp, &cs)) |
---|
364 | return (1); |
---|
365 | if (cs.cs_flags == CS_EOF) |
---|
366 | goto ret; |
---|
367 | } |
---|
368 | |
---|
369 | /* |
---|
370 | * If we didn't move, we must be at EOF. |
---|
371 | * |
---|
372 | * !!! |
---|
373 | * That's okay for motion commands, however. |
---|
374 | */ |
---|
375 | ret: if (!ISMOTION(vp) && |
---|
376 | cs.cs_lno == vp->m_start.lno && cs.cs_cno == vp->m_start.cno) { |
---|
377 | v_eof(sp, &vp->m_start); |
---|
378 | return (1); |
---|
379 | } |
---|
380 | |
---|
381 | /* Set the end of the range for motion commands. */ |
---|
382 | vp->m_stop.lno = cs.cs_lno; |
---|
383 | vp->m_stop.cno = cs.cs_cno; |
---|
384 | |
---|
385 | /* |
---|
386 | * Non-motion commands move to the end of the range. |
---|
387 | * Delete and yank stay at the start, ignore others. |
---|
388 | */ |
---|
389 | vp->m_final = ISMOTION(vp) ? vp->m_start : vp->m_stop; |
---|
390 | return (0); |
---|
391 | } |
---|
392 | |
---|
393 | /* |
---|
394 | * v_WordB -- [count]B |
---|
395 | * Move backward a bigword at a time. |
---|
396 | * |
---|
397 | * PUBLIC: int v_wordB __P((SCR *, VICMD *)); |
---|
398 | */ |
---|
399 | int |
---|
400 | v_wordB(sp, vp) |
---|
401 | SCR *sp; |
---|
402 | VICMD *vp; |
---|
403 | { |
---|
404 | return (bword(sp, vp, BIGWORD)); |
---|
405 | } |
---|
406 | |
---|
407 | /* |
---|
408 | * v_wordb -- [count]b |
---|
409 | * Move backward a word at a time. |
---|
410 | * |
---|
411 | * PUBLIC: int v_wordb __P((SCR *, VICMD *)); |
---|
412 | */ |
---|
413 | int |
---|
414 | v_wordb(sp, vp) |
---|
415 | SCR *sp; |
---|
416 | VICMD *vp; |
---|
417 | { |
---|
418 | return (bword(sp, vp, LITTLEWORD)); |
---|
419 | } |
---|
420 | |
---|
421 | /* |
---|
422 | * bword -- |
---|
423 | * Move backward by words. |
---|
424 | */ |
---|
425 | static int |
---|
426 | bword(sp, vp, type) |
---|
427 | SCR *sp; |
---|
428 | VICMD *vp; |
---|
429 | enum which type; |
---|
430 | { |
---|
431 | enum { INWORD, NOTWORD } state; |
---|
432 | VCS cs; |
---|
433 | u_long cnt; |
---|
434 | |
---|
435 | cnt = F_ISSET(vp, VC_C1SET) ? vp->count : 1; |
---|
436 | cs.cs_lno = vp->m_start.lno; |
---|
437 | cs.cs_cno = vp->m_start.cno; |
---|
438 | if (cs_init(sp, &cs)) |
---|
439 | return (1); |
---|
440 | |
---|
441 | /* |
---|
442 | * !!! |
---|
443 | * If in whitespace, or the previous character is whitespace, move |
---|
444 | * past it. (This doesn't count as a word move.) Stay at the |
---|
445 | * character before the current one, it sets word "state" for the |
---|
446 | * 'b' command. |
---|
447 | */ |
---|
448 | if (cs.cs_flags == 0 && !isblank(cs.cs_ch)) { |
---|
449 | if (cs_prev(sp, &cs)) |
---|
450 | return (1); |
---|
451 | if (cs.cs_flags == 0 && !isblank(cs.cs_ch)) |
---|
452 | goto start; |
---|
453 | } |
---|
454 | if (cs_bblank(sp, &cs)) |
---|
455 | return (1); |
---|
456 | |
---|
457 | /* |
---|
458 | * Cyclically move to the beginning of the previous word -- this |
---|
459 | * involves skipping over word characters and then any trailing |
---|
460 | * non-word characters. Note, for the 'b' command, the definition |
---|
461 | * of a word keeps switching. |
---|
462 | */ |
---|
463 | start: if (type == BIGWORD) |
---|
464 | while (cnt--) { |
---|
465 | for (;;) { |
---|
466 | if (cs_prev(sp, &cs)) |
---|
467 | return (1); |
---|
468 | if (cs.cs_flags == CS_SOF) |
---|
469 | goto ret; |
---|
470 | if (cs.cs_flags != 0 || isblank(cs.cs_ch)) |
---|
471 | break; |
---|
472 | } |
---|
473 | /* |
---|
474 | * When we reach the end of the word before the last |
---|
475 | * word, we're done. If we changed state, move forward |
---|
476 | * one to the end of the next word. |
---|
477 | */ |
---|
478 | if (cnt == 0) { |
---|
479 | if (cs.cs_flags == 0 && cs_next(sp, &cs)) |
---|
480 | return (1); |
---|
481 | break; |
---|
482 | } |
---|
483 | |
---|
484 | /* Eat whitespace characters. */ |
---|
485 | if (cs_bblank(sp, &cs)) |
---|
486 | return (1); |
---|
487 | if (cs.cs_flags == CS_SOF) |
---|
488 | goto ret; |
---|
489 | } |
---|
490 | else |
---|
491 | while (cnt--) { |
---|
492 | state = cs.cs_flags == 0 && |
---|
493 | inword(cs.cs_ch) ? INWORD : NOTWORD; |
---|
494 | for (;;) { |
---|
495 | if (cs_prev(sp, &cs)) |
---|
496 | return (1); |
---|
497 | if (cs.cs_flags == CS_SOF) |
---|
498 | goto ret; |
---|
499 | if (cs.cs_flags != 0 || isblank(cs.cs_ch)) |
---|
500 | break; |
---|
501 | if (state == INWORD) { |
---|
502 | if (!inword(cs.cs_ch)) |
---|
503 | break; |
---|
504 | } else |
---|
505 | if (inword(cs.cs_ch)) |
---|
506 | break; |
---|
507 | } |
---|
508 | /* See comment above. */ |
---|
509 | if (cnt == 0) { |
---|
510 | if (cs.cs_flags == 0 && cs_next(sp, &cs)) |
---|
511 | return (1); |
---|
512 | break; |
---|
513 | } |
---|
514 | |
---|
515 | /* Eat whitespace characters. */ |
---|
516 | if (cs.cs_flags != 0 || isblank(cs.cs_ch)) |
---|
517 | if (cs_bblank(sp, &cs)) |
---|
518 | return (1); |
---|
519 | if (cs.cs_flags == CS_SOF) |
---|
520 | goto ret; |
---|
521 | } |
---|
522 | |
---|
523 | /* If we didn't move, we must be at SOF. */ |
---|
524 | ret: if (cs.cs_lno == vp->m_start.lno && cs.cs_cno == vp->m_start.cno) { |
---|
525 | v_sof(sp, &vp->m_start); |
---|
526 | return (1); |
---|
527 | } |
---|
528 | |
---|
529 | /* Set the end of the range for motion commands. */ |
---|
530 | vp->m_stop.lno = cs.cs_lno; |
---|
531 | vp->m_stop.cno = cs.cs_cno; |
---|
532 | |
---|
533 | /* |
---|
534 | * All commands move to the end of the range. Motion commands |
---|
535 | * adjust the starting point to the character before the current |
---|
536 | * one. |
---|
537 | * |
---|
538 | * !!! |
---|
539 | * The historic vi didn't get this right -- the `yb' command yanked |
---|
540 | * the right stuff and even updated the cursor value, but the cursor |
---|
541 | * was not actually updated on the screen. |
---|
542 | */ |
---|
543 | vp->m_final = vp->m_stop; |
---|
544 | if (ISMOTION(vp)) |
---|
545 | --vp->m_start.cno; |
---|
546 | return (0); |
---|
547 | } |
---|