1 | /*- |
---|
2 | * See the file LICENSE for redistribution information. |
---|
3 | * |
---|
4 | * Copyright (c) 1999-2002 |
---|
5 | * Sleepycat Software. All rights reserved. |
---|
6 | * |
---|
7 | * Id: bt_verify.c,v 1.76 2002/07/03 19:03:51 bostic Exp |
---|
8 | */ |
---|
9 | |
---|
10 | #include "db_config.h" |
---|
11 | |
---|
12 | #ifndef lint |
---|
13 | static const char revid[] = "Id: bt_verify.c,v 1.76 2002/07/03 19:03:51 bostic Exp "; |
---|
14 | #endif /* not lint */ |
---|
15 | |
---|
16 | #ifndef NO_SYSTEM_INCLUDES |
---|
17 | #include <sys/types.h> |
---|
18 | |
---|
19 | #include <string.h> |
---|
20 | #endif |
---|
21 | |
---|
22 | #include "db_int.h" |
---|
23 | #include "dbinc/db_page.h" |
---|
24 | #include "dbinc/db_verify.h" |
---|
25 | #include "dbinc/btree.h" |
---|
26 | |
---|
27 | static int __bam_safe_getdata __P((DB *, PAGE *, u_int32_t, int, DBT *, int *)); |
---|
28 | static int __bam_vrfy_inp __P((DB *, VRFY_DBINFO *, PAGE *, db_pgno_t, |
---|
29 | db_indx_t *, u_int32_t)); |
---|
30 | static int __bam_vrfy_treeorder __P((DB *, db_pgno_t, PAGE *, BINTERNAL *, |
---|
31 | BINTERNAL *, int (*)(DB *, const DBT *, const DBT *), u_int32_t)); |
---|
32 | static int __ram_vrfy_inp __P((DB *, VRFY_DBINFO *, PAGE *, db_pgno_t, |
---|
33 | db_indx_t *, u_int32_t)); |
---|
34 | |
---|
35 | #define OKFLAGS (DB_AGGRESSIVE | DB_NOORDERCHK | DB_SALVAGE) |
---|
36 | |
---|
37 | /* |
---|
38 | * __bam_vrfy_meta -- |
---|
39 | * Verify the btree-specific part of a metadata page. |
---|
40 | * |
---|
41 | * PUBLIC: int __bam_vrfy_meta __P((DB *, VRFY_DBINFO *, BTMETA *, |
---|
42 | * PUBLIC: db_pgno_t, u_int32_t)); |
---|
43 | */ |
---|
44 | int |
---|
45 | __bam_vrfy_meta(dbp, vdp, meta, pgno, flags) |
---|
46 | DB *dbp; |
---|
47 | VRFY_DBINFO *vdp; |
---|
48 | BTMETA *meta; |
---|
49 | db_pgno_t pgno; |
---|
50 | u_int32_t flags; |
---|
51 | { |
---|
52 | VRFY_PAGEINFO *pip; |
---|
53 | int isbad, t_ret, ret; |
---|
54 | db_indx_t ovflsize; |
---|
55 | |
---|
56 | if ((ret = __db_vrfy_getpageinfo(vdp, pgno, &pip)) != 0) |
---|
57 | return (ret); |
---|
58 | |
---|
59 | isbad = 0; |
---|
60 | |
---|
61 | /* |
---|
62 | * If VRFY_INCOMPLETE is not set, then we didn't come through |
---|
63 | * __db_vrfy_pagezero and didn't incompletely |
---|
64 | * check this page--we haven't checked it at all. |
---|
65 | * Thus we need to call __db_vrfy_meta and check the common fields. |
---|
66 | * |
---|
67 | * If VRFY_INCOMPLETE is set, we've already done all the same work |
---|
68 | * in __db_vrfy_pagezero, so skip the check. |
---|
69 | */ |
---|
70 | if (!F_ISSET(pip, VRFY_INCOMPLETE) && |
---|
71 | (ret = __db_vrfy_meta(dbp, vdp, &meta->dbmeta, pgno, flags)) != 0) { |
---|
72 | if (ret == DB_VERIFY_BAD) |
---|
73 | isbad = 1; |
---|
74 | else |
---|
75 | goto err; |
---|
76 | } |
---|
77 | |
---|
78 | /* bt_minkey: must be >= 2; must produce sensible ovflsize */ |
---|
79 | |
---|
80 | /* avoid division by zero */ |
---|
81 | ovflsize = meta->minkey > 0 ? |
---|
82 | B_MINKEY_TO_OVFLSIZE(dbp, meta->minkey, dbp->pgsize) : 0; |
---|
83 | |
---|
84 | if (meta->minkey < 2 || |
---|
85 | ovflsize > B_MINKEY_TO_OVFLSIZE(dbp, DEFMINKEYPAGE, dbp->pgsize)) { |
---|
86 | pip->bt_minkey = 0; |
---|
87 | isbad = 1; |
---|
88 | EPRINT((dbp->dbenv, |
---|
89 | "Page %lu: nonsensical bt_minkey value %lu on metadata page", |
---|
90 | (u_long)pgno, (u_long)meta->minkey)); |
---|
91 | } else |
---|
92 | pip->bt_minkey = meta->minkey; |
---|
93 | |
---|
94 | /* bt_maxkey: no constraints (XXX: right?) */ |
---|
95 | pip->bt_maxkey = meta->maxkey; |
---|
96 | |
---|
97 | /* re_len: no constraints on this (may be zero or huge--we make rope) */ |
---|
98 | pip->re_len = meta->re_len; |
---|
99 | |
---|
100 | /* |
---|
101 | * The root must not be current page or 0 and it must be within |
---|
102 | * database. If this metadata page is the master meta data page |
---|
103 | * of the file, then the root page had better be page 1. |
---|
104 | */ |
---|
105 | pip->root = 0; |
---|
106 | if (meta->root == PGNO_INVALID || |
---|
107 | meta->root == pgno || !IS_VALID_PGNO(meta->root) || |
---|
108 | (pgno == PGNO_BASE_MD && meta->root != 1)) { |
---|
109 | isbad = 1; |
---|
110 | EPRINT((dbp->dbenv, |
---|
111 | "Page %lu: nonsensical root page %lu on metadata page", |
---|
112 | (u_long)pgno, (u_long)meta->root)); |
---|
113 | } else |
---|
114 | pip->root = meta->root; |
---|
115 | |
---|
116 | /* Flags. */ |
---|
117 | if (F_ISSET(&meta->dbmeta, BTM_RENUMBER)) |
---|
118 | F_SET(pip, VRFY_IS_RRECNO); |
---|
119 | |
---|
120 | if (F_ISSET(&meta->dbmeta, BTM_SUBDB)) { |
---|
121 | /* |
---|
122 | * If this is a master db meta page, it had better not have |
---|
123 | * duplicates. |
---|
124 | */ |
---|
125 | if (F_ISSET(&meta->dbmeta, BTM_DUP) && pgno == PGNO_BASE_MD) { |
---|
126 | isbad = 1; |
---|
127 | EPRINT((dbp->dbenv, |
---|
128 | "Page %lu: Btree metadata page has both duplicates and multiple databases", |
---|
129 | (u_long)pgno)); |
---|
130 | } |
---|
131 | F_SET(pip, VRFY_HAS_SUBDBS); |
---|
132 | } |
---|
133 | |
---|
134 | if (F_ISSET(&meta->dbmeta, BTM_DUP)) |
---|
135 | F_SET(pip, VRFY_HAS_DUPS); |
---|
136 | if (F_ISSET(&meta->dbmeta, BTM_DUPSORT)) |
---|
137 | F_SET(pip, VRFY_HAS_DUPSORT); |
---|
138 | if (F_ISSET(&meta->dbmeta, BTM_RECNUM)) |
---|
139 | F_SET(pip, VRFY_HAS_RECNUMS); |
---|
140 | if (F_ISSET(pip, VRFY_HAS_RECNUMS) && F_ISSET(pip, VRFY_HAS_DUPS)) { |
---|
141 | EPRINT((dbp->dbenv, |
---|
142 | "Page %lu: Btree metadata page illegally has both recnums and dups", |
---|
143 | (u_long)pgno)); |
---|
144 | isbad = 1; |
---|
145 | } |
---|
146 | |
---|
147 | if (F_ISSET(&meta->dbmeta, BTM_RECNO)) { |
---|
148 | F_SET(pip, VRFY_IS_RECNO); |
---|
149 | dbp->type = DB_RECNO; |
---|
150 | } else if (F_ISSET(pip, VRFY_IS_RRECNO)) { |
---|
151 | isbad = 1; |
---|
152 | EPRINT((dbp->dbenv, |
---|
153 | "Page %lu: metadata page has renumber flag set but is not recno", |
---|
154 | (u_long)pgno)); |
---|
155 | } |
---|
156 | |
---|
157 | if (F_ISSET(pip, VRFY_IS_RECNO) && F_ISSET(pip, VRFY_HAS_DUPS)) { |
---|
158 | EPRINT((dbp->dbenv, |
---|
159 | "Page %lu: recno metadata page specifies duplicates", |
---|
160 | (u_long)pgno)); |
---|
161 | isbad = 1; |
---|
162 | } |
---|
163 | |
---|
164 | if (F_ISSET(&meta->dbmeta, BTM_FIXEDLEN)) |
---|
165 | F_SET(pip, VRFY_IS_FIXEDLEN); |
---|
166 | else if (pip->re_len > 0) { |
---|
167 | /* |
---|
168 | * It's wrong to have an re_len if it's not a fixed-length |
---|
169 | * database |
---|
170 | */ |
---|
171 | isbad = 1; |
---|
172 | EPRINT((dbp->dbenv, |
---|
173 | "Page %lu: re_len of %lu in non-fixed-length database", |
---|
174 | (u_long)pgno, (u_long)pip->re_len)); |
---|
175 | } |
---|
176 | |
---|
177 | /* |
---|
178 | * We do not check that the rest of the page is 0, because it may |
---|
179 | * not be and may still be correct. |
---|
180 | */ |
---|
181 | |
---|
182 | err: if ((t_ret = |
---|
183 | __db_vrfy_putpageinfo(dbp->dbenv, vdp, pip)) != 0 && ret == 0) |
---|
184 | ret = t_ret; |
---|
185 | return ((ret == 0 && isbad == 1) ? DB_VERIFY_BAD : ret); |
---|
186 | } |
---|
187 | |
---|
188 | /* |
---|
189 | * __ram_vrfy_leaf -- |
---|
190 | * Verify a recno leaf page. |
---|
191 | * |
---|
192 | * PUBLIC: int __ram_vrfy_leaf __P((DB *, VRFY_DBINFO *, PAGE *, db_pgno_t, |
---|
193 | * PUBLIC: u_int32_t)); |
---|
194 | */ |
---|
195 | int |
---|
196 | __ram_vrfy_leaf(dbp, vdp, h, pgno, flags) |
---|
197 | DB *dbp; |
---|
198 | VRFY_DBINFO *vdp; |
---|
199 | PAGE *h; |
---|
200 | db_pgno_t pgno; |
---|
201 | u_int32_t flags; |
---|
202 | { |
---|
203 | BKEYDATA *bk; |
---|
204 | VRFY_PAGEINFO *pip; |
---|
205 | db_indx_t i; |
---|
206 | int ret, t_ret, isbad; |
---|
207 | u_int32_t re_len_guess, len; |
---|
208 | |
---|
209 | isbad = 0; |
---|
210 | if ((ret = __db_vrfy_getpageinfo(vdp, pgno, &pip)) != 0) |
---|
211 | return (ret); |
---|
212 | |
---|
213 | if ((ret = __db_fchk(dbp->dbenv, |
---|
214 | "__ram_vrfy_leaf", flags, OKFLAGS)) != 0) |
---|
215 | goto err; |
---|
216 | |
---|
217 | if (TYPE(h) != P_LRECNO) { |
---|
218 | /* We should not have been called. */ |
---|
219 | TYPE_ERR_PRINT(dbp->dbenv, "__ram_vrfy_leaf", pgno, TYPE(h)); |
---|
220 | DB_ASSERT(0); |
---|
221 | ret = EINVAL; |
---|
222 | goto err; |
---|
223 | } |
---|
224 | |
---|
225 | /* |
---|
226 | * Verify (and, if relevant, save off) page fields common to |
---|
227 | * all PAGEs. |
---|
228 | */ |
---|
229 | if ((ret = __db_vrfy_datapage(dbp, vdp, h, pgno, flags)) != 0) { |
---|
230 | if (ret == DB_VERIFY_BAD) |
---|
231 | isbad = 1; |
---|
232 | else |
---|
233 | goto err; |
---|
234 | } |
---|
235 | |
---|
236 | /* |
---|
237 | * Verify inp[]. Return immediately if it returns DB_VERIFY_BAD; |
---|
238 | * further checks are dangerous. |
---|
239 | */ |
---|
240 | if ((ret = __bam_vrfy_inp(dbp, |
---|
241 | vdp, h, pgno, &pip->entries, flags)) != 0) |
---|
242 | goto err; |
---|
243 | |
---|
244 | if (F_ISSET(pip, VRFY_HAS_DUPS)) { |
---|
245 | EPRINT((dbp->dbenv, |
---|
246 | "Page %lu: Recno database has dups", (u_long)pgno)); |
---|
247 | ret = DB_VERIFY_BAD; |
---|
248 | goto err; |
---|
249 | } |
---|
250 | |
---|
251 | /* |
---|
252 | * Walk through inp and see if the lengths of all the records are the |
---|
253 | * same--if so, this may be a fixed-length database, and we want to |
---|
254 | * save off this value. We know inp to be safe if we've gotten this |
---|
255 | * far. |
---|
256 | */ |
---|
257 | re_len_guess = 0; |
---|
258 | for (i = 0; i < NUM_ENT(h); i++) { |
---|
259 | bk = GET_BKEYDATA(dbp, h, i); |
---|
260 | /* KEYEMPTY. Go on. */ |
---|
261 | if (B_DISSET(bk->type)) |
---|
262 | continue; |
---|
263 | if (bk->type == B_OVERFLOW) |
---|
264 | len = ((BOVERFLOW *)bk)->tlen; |
---|
265 | else if (bk->type == B_KEYDATA) |
---|
266 | len = bk->len; |
---|
267 | else { |
---|
268 | isbad = 1; |
---|
269 | EPRINT((dbp->dbenv, |
---|
270 | "Page %lu: nonsensical type for item %lu", |
---|
271 | (u_long)pgno, (u_long)i)); |
---|
272 | continue; |
---|
273 | } |
---|
274 | if (re_len_guess == 0) |
---|
275 | re_len_guess = len; |
---|
276 | |
---|
277 | /* |
---|
278 | * Is this item's len the same as the last one's? If not, |
---|
279 | * reset to 0 and break--we don't have a single re_len. |
---|
280 | * Otherwise, go on to the next item. |
---|
281 | */ |
---|
282 | if (re_len_guess != len) { |
---|
283 | re_len_guess = 0; |
---|
284 | break; |
---|
285 | } |
---|
286 | } |
---|
287 | pip->re_len = re_len_guess; |
---|
288 | |
---|
289 | /* Save off record count. */ |
---|
290 | pip->rec_cnt = NUM_ENT(h); |
---|
291 | |
---|
292 | err: if ((t_ret = |
---|
293 | __db_vrfy_putpageinfo(dbp->dbenv, vdp, pip)) != 0 && ret == 0) |
---|
294 | ret = t_ret; |
---|
295 | return ((ret == 0 && isbad == 1) ? DB_VERIFY_BAD : ret); |
---|
296 | } |
---|
297 | |
---|
298 | /* |
---|
299 | * __bam_vrfy -- |
---|
300 | * Verify a btree leaf or internal page. |
---|
301 | * |
---|
302 | * PUBLIC: int __bam_vrfy __P((DB *, VRFY_DBINFO *, PAGE *, db_pgno_t, |
---|
303 | * PUBLIC: u_int32_t)); |
---|
304 | */ |
---|
305 | int |
---|
306 | __bam_vrfy(dbp, vdp, h, pgno, flags) |
---|
307 | DB *dbp; |
---|
308 | VRFY_DBINFO *vdp; |
---|
309 | PAGE *h; |
---|
310 | db_pgno_t pgno; |
---|
311 | u_int32_t flags; |
---|
312 | { |
---|
313 | VRFY_PAGEINFO *pip; |
---|
314 | int ret, t_ret, isbad; |
---|
315 | |
---|
316 | isbad = 0; |
---|
317 | if ((ret = __db_vrfy_getpageinfo(vdp, pgno, &pip)) != 0) |
---|
318 | return (ret); |
---|
319 | |
---|
320 | switch (TYPE(h)) { |
---|
321 | case P_IBTREE: |
---|
322 | case P_IRECNO: |
---|
323 | case P_LBTREE: |
---|
324 | case P_LDUP: |
---|
325 | break; |
---|
326 | default: |
---|
327 | TYPE_ERR_PRINT(dbp->dbenv, "__bam_vrfy", pgno, TYPE(h)); |
---|
328 | DB_ASSERT(0); |
---|
329 | ret = EINVAL; |
---|
330 | goto err; |
---|
331 | } |
---|
332 | |
---|
333 | /* |
---|
334 | * Verify (and, if relevant, save off) page fields common to |
---|
335 | * all PAGEs. |
---|
336 | */ |
---|
337 | if ((ret = __db_vrfy_datapage(dbp, vdp, h, pgno, flags)) != 0) { |
---|
338 | if (ret == DB_VERIFY_BAD) |
---|
339 | isbad = 1; |
---|
340 | else |
---|
341 | goto err; |
---|
342 | } |
---|
343 | |
---|
344 | /* |
---|
345 | * The record count is, on internal pages, stored in an overloaded |
---|
346 | * next_pgno field. Save it off; we'll verify it when we check |
---|
347 | * overall database structure. We could overload the field |
---|
348 | * in VRFY_PAGEINFO, too, but this seems gross, and space |
---|
349 | * is not at such a premium. |
---|
350 | */ |
---|
351 | pip->rec_cnt = RE_NREC(h); |
---|
352 | |
---|
353 | /* |
---|
354 | * Verify inp[]. |
---|
355 | */ |
---|
356 | if (TYPE(h) == P_IRECNO) { |
---|
357 | if ((ret = __ram_vrfy_inp(dbp, |
---|
358 | vdp, h, pgno, &pip->entries, flags)) != 0) |
---|
359 | goto err; |
---|
360 | } else if ((ret = __bam_vrfy_inp(dbp, |
---|
361 | vdp, h, pgno, &pip->entries, flags)) != 0) { |
---|
362 | if (ret == DB_VERIFY_BAD) |
---|
363 | isbad = 1; |
---|
364 | else |
---|
365 | goto err; |
---|
366 | EPRINT((dbp->dbenv, |
---|
367 | "Page %lu: item order check unsafe: skipping", |
---|
368 | (u_long)pgno)); |
---|
369 | } else if (!LF_ISSET(DB_NOORDERCHK) && (ret = |
---|
370 | __bam_vrfy_itemorder(dbp, vdp, h, pgno, 0, 0, 0, flags)) != 0) { |
---|
371 | /* |
---|
372 | * We know that the elements of inp are reasonable. |
---|
373 | * |
---|
374 | * Check that elements fall in the proper order. |
---|
375 | */ |
---|
376 | if (ret == DB_VERIFY_BAD) |
---|
377 | isbad = 1; |
---|
378 | else |
---|
379 | goto err; |
---|
380 | } |
---|
381 | |
---|
382 | err: if ((t_ret = |
---|
383 | __db_vrfy_putpageinfo(dbp->dbenv, vdp, pip)) != 0 && ret == 0) |
---|
384 | ret = t_ret; |
---|
385 | return ((ret == 0 && isbad == 1) ? DB_VERIFY_BAD : ret); |
---|
386 | } |
---|
387 | |
---|
388 | /* |
---|
389 | * __ram_vrfy_inp -- |
---|
390 | * Verify that all entries in a P_IRECNO inp[] array are reasonable, |
---|
391 | * and count them. Note that P_LRECNO uses __bam_vrfy_inp; |
---|
392 | * P_IRECNOs are a special, and simpler, case, since they have |
---|
393 | * RINTERNALs rather than BKEYDATA/BINTERNALs. |
---|
394 | */ |
---|
395 | static int |
---|
396 | __ram_vrfy_inp(dbp, vdp, h, pgno, nentriesp, flags) |
---|
397 | DB *dbp; |
---|
398 | VRFY_DBINFO *vdp; |
---|
399 | PAGE *h; |
---|
400 | db_pgno_t pgno; |
---|
401 | db_indx_t *nentriesp; |
---|
402 | u_int32_t flags; |
---|
403 | { |
---|
404 | RINTERNAL *ri; |
---|
405 | VRFY_CHILDINFO child; |
---|
406 | VRFY_PAGEINFO *pip; |
---|
407 | int ret, t_ret, isbad; |
---|
408 | u_int32_t himark, i, offset, nentries; |
---|
409 | db_indx_t *inp; |
---|
410 | u_int8_t *pagelayout, *p; |
---|
411 | |
---|
412 | isbad = 0; |
---|
413 | memset(&child, 0, sizeof(VRFY_CHILDINFO)); |
---|
414 | nentries = 0; |
---|
415 | pagelayout = NULL; |
---|
416 | |
---|
417 | if ((ret = __db_vrfy_getpageinfo(vdp, pgno, &pip)) != 0) |
---|
418 | return (ret); |
---|
419 | |
---|
420 | if (TYPE(h) != P_IRECNO) { |
---|
421 | TYPE_ERR_PRINT(dbp->dbenv, "__ram_vrfy_inp", pgno, TYPE(h)); |
---|
422 | DB_ASSERT(0); |
---|
423 | ret = EINVAL; |
---|
424 | goto err; |
---|
425 | } |
---|
426 | |
---|
427 | himark = dbp->pgsize; |
---|
428 | if ((ret = |
---|
429 | __os_malloc(dbp->dbenv, dbp->pgsize, &pagelayout)) != 0) |
---|
430 | goto err; |
---|
431 | memset(pagelayout, 0, dbp->pgsize); |
---|
432 | inp = P_INP(dbp, h); |
---|
433 | for (i = 0; i < NUM_ENT(h); i++) { |
---|
434 | if ((u_int8_t *)inp + i >= (u_int8_t *)h + himark) { |
---|
435 | EPRINT((dbp->dbenv, |
---|
436 | "Page %lu: entries listing %lu overlaps data", |
---|
437 | (u_long)pgno, (u_long)i)); |
---|
438 | ret = DB_VERIFY_BAD; |
---|
439 | goto err; |
---|
440 | } |
---|
441 | offset = inp[i]; |
---|
442 | /* |
---|
443 | * Check that the item offset is reasonable: it points |
---|
444 | * somewhere after the inp array and before the end of the |
---|
445 | * page. |
---|
446 | */ |
---|
447 | if (offset <= (u_int32_t)((u_int8_t *)inp + i - |
---|
448 | (u_int8_t *)h) || |
---|
449 | offset > (u_int32_t)(dbp->pgsize - RINTERNAL_SIZE)) { |
---|
450 | isbad = 1; |
---|
451 | EPRINT((dbp->dbenv, |
---|
452 | "Page %lu: bad offset %lu at index %lu", |
---|
453 | (u_long)pgno, (u_long)offset, (u_long)i)); |
---|
454 | continue; |
---|
455 | } |
---|
456 | |
---|
457 | /* Update the high-water mark (what HOFFSET should be) */ |
---|
458 | if (offset < himark) |
---|
459 | himark = offset; |
---|
460 | |
---|
461 | nentries++; |
---|
462 | |
---|
463 | /* Make sure this RINTERNAL is not multiply referenced. */ |
---|
464 | ri = GET_RINTERNAL(dbp, h, i); |
---|
465 | if (pagelayout[offset] == 0) { |
---|
466 | pagelayout[offset] = 1; |
---|
467 | child.pgno = ri->pgno; |
---|
468 | child.type = V_RECNO; |
---|
469 | child.nrecs = ri->nrecs; |
---|
470 | if ((ret = __db_vrfy_childput(vdp, pgno, &child)) != 0) |
---|
471 | goto err; |
---|
472 | } else { |
---|
473 | EPRINT((dbp->dbenv, |
---|
474 | "Page %lu: RINTERNAL structure at offset %lu referenced twice", |
---|
475 | (u_long)pgno, (u_long)offset)); |
---|
476 | isbad = 1; |
---|
477 | } |
---|
478 | } |
---|
479 | |
---|
480 | for (p = pagelayout + himark; |
---|
481 | p < pagelayout + dbp->pgsize; |
---|
482 | p += RINTERNAL_SIZE) |
---|
483 | if (*p != 1) { |
---|
484 | EPRINT((dbp->dbenv, |
---|
485 | "Page %lu: gap between items at offset %lu", |
---|
486 | (u_long)pgno, (u_long)(p - pagelayout))); |
---|
487 | isbad = 1; |
---|
488 | } |
---|
489 | |
---|
490 | if ((db_indx_t)himark != HOFFSET(h)) { |
---|
491 | EPRINT((dbp->dbenv, |
---|
492 | "Page %lu: bad HOFFSET %lu, appears to be %lu", |
---|
493 | (u_long)pgno, (u_long)(HOFFSET(h)), (u_long)himark)); |
---|
494 | isbad = 1; |
---|
495 | } |
---|
496 | |
---|
497 | *nentriesp = nentries; |
---|
498 | |
---|
499 | err: if ((t_ret = |
---|
500 | __db_vrfy_putpageinfo(dbp->dbenv, vdp, pip)) != 0 && ret == 0) |
---|
501 | ret = t_ret; |
---|
502 | if (pagelayout != NULL) |
---|
503 | __os_free(dbp->dbenv, pagelayout); |
---|
504 | return ((ret == 0 && isbad == 1) ? DB_VERIFY_BAD : ret); |
---|
505 | } |
---|
506 | |
---|
507 | /* |
---|
508 | * __bam_vrfy_inp -- |
---|
509 | * Verify that all entries in inp[] array are reasonable; |
---|
510 | * count them. |
---|
511 | */ |
---|
512 | static int |
---|
513 | __bam_vrfy_inp(dbp, vdp, h, pgno, nentriesp, flags) |
---|
514 | DB *dbp; |
---|
515 | VRFY_DBINFO *vdp; |
---|
516 | PAGE *h; |
---|
517 | db_pgno_t pgno; |
---|
518 | db_indx_t *nentriesp; |
---|
519 | u_int32_t flags; |
---|
520 | { |
---|
521 | BKEYDATA *bk; |
---|
522 | BOVERFLOW *bo; |
---|
523 | VRFY_CHILDINFO child; |
---|
524 | VRFY_PAGEINFO *pip; |
---|
525 | int isbad, initem, isdupitem, ret, t_ret; |
---|
526 | u_int32_t himark, offset; /* These would be db_indx_ts but for algnmt.*/ |
---|
527 | u_int32_t i, endoff, nentries; |
---|
528 | u_int8_t *pagelayout; |
---|
529 | |
---|
530 | isbad = isdupitem = 0; |
---|
531 | nentries = 0; |
---|
532 | memset(&child, 0, sizeof(VRFY_CHILDINFO)); |
---|
533 | if ((ret = __db_vrfy_getpageinfo(vdp, pgno, &pip)) != 0) |
---|
534 | return (ret); |
---|
535 | |
---|
536 | switch (TYPE(h)) { |
---|
537 | case P_IBTREE: |
---|
538 | case P_LBTREE: |
---|
539 | case P_LDUP: |
---|
540 | case P_LRECNO: |
---|
541 | break; |
---|
542 | default: |
---|
543 | /* |
---|
544 | * In the salvager, we might call this from a page which |
---|
545 | * we merely suspect is a btree page. Otherwise, it |
---|
546 | * shouldn't get called--if it is, that's a verifier bug. |
---|
547 | */ |
---|
548 | if (LF_ISSET(DB_SALVAGE)) |
---|
549 | break; |
---|
550 | TYPE_ERR_PRINT(dbp->dbenv, "__bam_vrfy_inp", pgno, TYPE(h)); |
---|
551 | DB_ASSERT(0); |
---|
552 | ret = EINVAL; |
---|
553 | goto err; |
---|
554 | } |
---|
555 | |
---|
556 | /* |
---|
557 | * Loop through inp[], the array of items, until we either |
---|
558 | * run out of entries or collide with the data. Keep track |
---|
559 | * of h_offset in himark. |
---|
560 | * |
---|
561 | * For each element in inp[i], make sure it references a region |
---|
562 | * that starts after the end of the inp array (as defined by |
---|
563 | * NUM_ENT(h)), ends before the beginning of the page, doesn't |
---|
564 | * overlap any other regions, and doesn't have a gap between |
---|
565 | * it and the region immediately after it. |
---|
566 | */ |
---|
567 | himark = dbp->pgsize; |
---|
568 | if ((ret = __os_malloc(dbp->dbenv, dbp->pgsize, &pagelayout)) != 0) |
---|
569 | goto err; |
---|
570 | memset(pagelayout, 0, dbp->pgsize); |
---|
571 | for (i = 0; i < NUM_ENT(h); i++) { |
---|
572 | switch (ret = __db_vrfy_inpitem(dbp, |
---|
573 | h, pgno, i, 1, flags, &himark, &offset)) { |
---|
574 | case 0: |
---|
575 | break; |
---|
576 | case DB_VERIFY_BAD: |
---|
577 | isbad = 1; |
---|
578 | continue; |
---|
579 | case DB_VERIFY_FATAL: |
---|
580 | isbad = 1; |
---|
581 | goto err; |
---|
582 | default: |
---|
583 | DB_ASSERT(ret != 0); |
---|
584 | break; |
---|
585 | } |
---|
586 | |
---|
587 | /* |
---|
588 | * We now have a plausible beginning for the item, and we know |
---|
589 | * its length is safe. |
---|
590 | * |
---|
591 | * Mark the beginning and end in pagelayout so we can make sure |
---|
592 | * items have no overlaps or gaps. |
---|
593 | */ |
---|
594 | bk = GET_BKEYDATA(dbp, h, i); |
---|
595 | #define ITEM_BEGIN 1 |
---|
596 | #define ITEM_END 2 |
---|
597 | if (pagelayout[offset] == 0) |
---|
598 | pagelayout[offset] = ITEM_BEGIN; |
---|
599 | else if (pagelayout[offset] == ITEM_BEGIN) { |
---|
600 | /* |
---|
601 | * Having two inp entries that point at the same patch |
---|
602 | * of page is legal if and only if the page is |
---|
603 | * a btree leaf and they're onpage duplicate keys-- |
---|
604 | * that is, if (i % P_INDX) == 0. |
---|
605 | */ |
---|
606 | if ((i % P_INDX == 0) && (TYPE(h) == P_LBTREE)) { |
---|
607 | /* Flag for later. */ |
---|
608 | F_SET(pip, VRFY_HAS_DUPS); |
---|
609 | |
---|
610 | /* Bump up nentries so we don't undercount. */ |
---|
611 | nentries++; |
---|
612 | |
---|
613 | /* |
---|
614 | * We'll check to make sure the end is |
---|
615 | * equal, too. |
---|
616 | */ |
---|
617 | isdupitem = 1; |
---|
618 | } else { |
---|
619 | isbad = 1; |
---|
620 | EPRINT((dbp->dbenv, |
---|
621 | "Page %lu: duplicated item %lu", |
---|
622 | (u_long)pgno, (u_long)i)); |
---|
623 | } |
---|
624 | } |
---|
625 | |
---|
626 | /* |
---|
627 | * Mark the end. Its location varies with the page type |
---|
628 | * and the item type. |
---|
629 | * |
---|
630 | * If the end already has a sign other than 0, do nothing-- |
---|
631 | * it's an overlap that we'll catch later. |
---|
632 | */ |
---|
633 | switch(B_TYPE(bk->type)) { |
---|
634 | case B_KEYDATA: |
---|
635 | if (TYPE(h) == P_IBTREE) |
---|
636 | /* It's a BINTERNAL. */ |
---|
637 | endoff = offset + BINTERNAL_SIZE(bk->len) - 1; |
---|
638 | else |
---|
639 | endoff = offset + BKEYDATA_SIZE(bk->len) - 1; |
---|
640 | break; |
---|
641 | case B_DUPLICATE: |
---|
642 | /* |
---|
643 | * Flag that we have dups; we'll check whether |
---|
644 | * that's okay during the structure check. |
---|
645 | */ |
---|
646 | F_SET(pip, VRFY_HAS_DUPS); |
---|
647 | /* FALLTHROUGH */ |
---|
648 | case B_OVERFLOW: |
---|
649 | /* |
---|
650 | * Overflow entries on internal pages are stored |
---|
651 | * as the _data_ of a BINTERNAL; overflow entries |
---|
652 | * on leaf pages are stored as the entire entry. |
---|
653 | */ |
---|
654 | endoff = offset + |
---|
655 | ((TYPE(h) == P_IBTREE) ? |
---|
656 | BINTERNAL_SIZE(BOVERFLOW_SIZE) : |
---|
657 | BOVERFLOW_SIZE) - 1; |
---|
658 | break; |
---|
659 | default: |
---|
660 | /* |
---|
661 | * We'll complain later; for now, just mark |
---|
662 | * a minimum. |
---|
663 | */ |
---|
664 | endoff = offset + BKEYDATA_SIZE(0) - 1; |
---|
665 | break; |
---|
666 | } |
---|
667 | |
---|
668 | /* |
---|
669 | * If this is an onpage duplicate key we've seen before, |
---|
670 | * the end had better coincide too. |
---|
671 | */ |
---|
672 | if (isdupitem && pagelayout[endoff] != ITEM_END) { |
---|
673 | EPRINT((dbp->dbenv, |
---|
674 | "Page %lu: duplicated item %lu", |
---|
675 | (u_long)pgno, (u_long)i)); |
---|
676 | isbad = 1; |
---|
677 | } else if (pagelayout[endoff] == 0) |
---|
678 | pagelayout[endoff] = ITEM_END; |
---|
679 | isdupitem = 0; |
---|
680 | |
---|
681 | /* |
---|
682 | * There should be no deleted items in a quiescent tree, |
---|
683 | * except in recno. |
---|
684 | */ |
---|
685 | if (B_DISSET(bk->type) && TYPE(h) != P_LRECNO) { |
---|
686 | isbad = 1; |
---|
687 | EPRINT((dbp->dbenv, |
---|
688 | "Page %lu: item %lu marked deleted", |
---|
689 | (u_long)pgno, (u_long)i)); |
---|
690 | } |
---|
691 | |
---|
692 | /* |
---|
693 | * Check the type and such of bk--make sure it's reasonable |
---|
694 | * for the pagetype. |
---|
695 | */ |
---|
696 | switch (B_TYPE(bk->type)) { |
---|
697 | case B_KEYDATA: |
---|
698 | /* |
---|
699 | * This is a normal, non-overflow BKEYDATA or BINTERNAL. |
---|
700 | * The only thing to check is the len, and that's |
---|
701 | * already been done. |
---|
702 | */ |
---|
703 | break; |
---|
704 | case B_DUPLICATE: |
---|
705 | if (TYPE(h) == P_IBTREE) { |
---|
706 | isbad = 1; |
---|
707 | EPRINT((dbp->dbenv, |
---|
708 | "Page %lu: duplicate page referenced by internal btree page at item %lu", |
---|
709 | (u_long)pgno, (u_long)i)); |
---|
710 | break; |
---|
711 | } else if (TYPE(h) == P_LRECNO) { |
---|
712 | isbad = 1; |
---|
713 | EPRINT((dbp->dbenv, |
---|
714 | "Page %lu: duplicate page referenced by recno page at item %lu", |
---|
715 | (u_long)pgno, (u_long)i)); |
---|
716 | break; |
---|
717 | } |
---|
718 | /* FALLTHROUGH */ |
---|
719 | case B_OVERFLOW: |
---|
720 | bo = (TYPE(h) == P_IBTREE) ? |
---|
721 | (BOVERFLOW *)(((BINTERNAL *)bk)->data) : |
---|
722 | (BOVERFLOW *)bk; |
---|
723 | |
---|
724 | if (B_TYPE(bk->type) == B_OVERFLOW) |
---|
725 | /* Make sure tlen is reasonable. */ |
---|
726 | if (bo->tlen > dbp->pgsize * vdp->last_pgno) { |
---|
727 | isbad = 1; |
---|
728 | EPRINT((dbp->dbenv, |
---|
729 | "Page %lu: impossible tlen %lu, item %lu", |
---|
730 | (u_long)pgno, |
---|
731 | (u_long)bo->tlen, (u_long)i)); |
---|
732 | /* Don't save as a child. */ |
---|
733 | break; |
---|
734 | } |
---|
735 | |
---|
736 | if (!IS_VALID_PGNO(bo->pgno) || bo->pgno == pgno || |
---|
737 | bo->pgno == PGNO_INVALID) { |
---|
738 | isbad = 1; |
---|
739 | EPRINT((dbp->dbenv, |
---|
740 | "Page %lu: offpage item %lu has bad pgno %lu", |
---|
741 | (u_long)pgno, (u_long)i, (u_long)bo->pgno)); |
---|
742 | /* Don't save as a child. */ |
---|
743 | break; |
---|
744 | } |
---|
745 | |
---|
746 | child.pgno = bo->pgno; |
---|
747 | child.type = (B_TYPE(bk->type) == B_OVERFLOW ? |
---|
748 | V_OVERFLOW : V_DUPLICATE); |
---|
749 | child.tlen = bo->tlen; |
---|
750 | if ((ret = __db_vrfy_childput(vdp, pgno, &child)) != 0) |
---|
751 | goto err; |
---|
752 | break; |
---|
753 | default: |
---|
754 | isbad = 1; |
---|
755 | EPRINT((dbp->dbenv, |
---|
756 | "Page %lu: item %lu of invalid type %lu", |
---|
757 | (u_long)pgno, (u_long)i)); |
---|
758 | break; |
---|
759 | } |
---|
760 | } |
---|
761 | |
---|
762 | /* |
---|
763 | * Now, loop through and make sure the items are contiguous and |
---|
764 | * non-overlapping. |
---|
765 | */ |
---|
766 | initem = 0; |
---|
767 | for (i = himark; i < dbp->pgsize; i++) |
---|
768 | if (initem == 0) |
---|
769 | switch (pagelayout[i]) { |
---|
770 | case 0: |
---|
771 | /* May be just for alignment. */ |
---|
772 | if (i != ALIGN(i, sizeof(u_int32_t))) |
---|
773 | continue; |
---|
774 | |
---|
775 | isbad = 1; |
---|
776 | EPRINT((dbp->dbenv, |
---|
777 | "Page %lu: gap between items at offset %lu", |
---|
778 | (u_long)pgno, (u_long)i)); |
---|
779 | /* Find the end of the gap */ |
---|
780 | for ( ; pagelayout[i + 1] == 0 && |
---|
781 | (size_t)(i + 1) < dbp->pgsize; i++) |
---|
782 | ; |
---|
783 | break; |
---|
784 | case ITEM_BEGIN: |
---|
785 | /* We've found an item. Check its alignment. */ |
---|
786 | if (i != ALIGN(i, sizeof(u_int32_t))) { |
---|
787 | isbad = 1; |
---|
788 | EPRINT((dbp->dbenv, |
---|
789 | "Page %lu: offset %lu unaligned", |
---|
790 | (u_long)pgno, (u_long)i)); |
---|
791 | } |
---|
792 | initem = 1; |
---|
793 | nentries++; |
---|
794 | break; |
---|
795 | case ITEM_END: |
---|
796 | /* |
---|
797 | * We've hit the end of an item even though |
---|
798 | * we don't think we're in one; must |
---|
799 | * be an overlap. |
---|
800 | */ |
---|
801 | isbad = 1; |
---|
802 | EPRINT((dbp->dbenv, |
---|
803 | "Page %lu: overlapping items at offset %lu", |
---|
804 | (u_long)pgno, (u_long)i)); |
---|
805 | break; |
---|
806 | default: |
---|
807 | /* Should be impossible. */ |
---|
808 | DB_ASSERT(0); |
---|
809 | ret = EINVAL; |
---|
810 | goto err; |
---|
811 | } |
---|
812 | else |
---|
813 | switch (pagelayout[i]) { |
---|
814 | case 0: |
---|
815 | /* In the middle of an item somewhere. Okay. */ |
---|
816 | break; |
---|
817 | case ITEM_END: |
---|
818 | /* End of an item; switch to out-of-item mode.*/ |
---|
819 | initem = 0; |
---|
820 | break; |
---|
821 | case ITEM_BEGIN: |
---|
822 | /* |
---|
823 | * Hit a second item beginning without an |
---|
824 | * end. Overlap. |
---|
825 | */ |
---|
826 | isbad = 1; |
---|
827 | EPRINT((dbp->dbenv, |
---|
828 | "Page %lu: overlapping items at offset %lu", |
---|
829 | (u_long)pgno, (u_long)i)); |
---|
830 | break; |
---|
831 | } |
---|
832 | |
---|
833 | (void)__os_free(dbp->dbenv, pagelayout); |
---|
834 | |
---|
835 | /* Verify HOFFSET. */ |
---|
836 | if ((db_indx_t)himark != HOFFSET(h)) { |
---|
837 | EPRINT((dbp->dbenv, |
---|
838 | "Page %lu: bad HOFFSET %lu, appears to be %lu", |
---|
839 | (u_long)pgno, (u_long)HOFFSET(h), (u_long)himark)); |
---|
840 | isbad = 1; |
---|
841 | } |
---|
842 | |
---|
843 | err: if (nentriesp != NULL) |
---|
844 | *nentriesp = nentries; |
---|
845 | |
---|
846 | if ((t_ret = |
---|
847 | __db_vrfy_putpageinfo(dbp->dbenv, vdp, pip)) != 0 && ret == 0) |
---|
848 | ret = t_ret; |
---|
849 | |
---|
850 | return ((isbad == 1 && ret == 0) ? DB_VERIFY_BAD : ret); |
---|
851 | } |
---|
852 | |
---|
853 | /* |
---|
854 | * __bam_vrfy_itemorder -- |
---|
855 | * Make sure the items on a page sort correctly. |
---|
856 | * |
---|
857 | * Assumes that NUM_ENT(h) and inp[0]..inp[NUM_ENT(h) - 1] are |
---|
858 | * reasonable; be sure that __bam_vrfy_inp has been called first. |
---|
859 | * |
---|
860 | * If ovflok is set, it also assumes that overflow page chains |
---|
861 | * hanging off the current page have been sanity-checked, and so we |
---|
862 | * can use __bam_cmp to verify their ordering. If it is not set, |
---|
863 | * and we run into an overflow page, carp and return DB_VERIFY_BAD; |
---|
864 | * we shouldn't be called if any exist. |
---|
865 | * |
---|
866 | * PUBLIC: int __bam_vrfy_itemorder __P((DB *, VRFY_DBINFO *, PAGE *, |
---|
867 | * PUBLIC: db_pgno_t, u_int32_t, int, int, u_int32_t)); |
---|
868 | */ |
---|
869 | int |
---|
870 | __bam_vrfy_itemorder(dbp, vdp, h, pgno, nentries, ovflok, hasdups, flags) |
---|
871 | DB *dbp; |
---|
872 | VRFY_DBINFO *vdp; |
---|
873 | PAGE *h; |
---|
874 | db_pgno_t pgno; |
---|
875 | u_int32_t nentries; |
---|
876 | int ovflok, hasdups; |
---|
877 | u_int32_t flags; |
---|
878 | { |
---|
879 | DBT dbta, dbtb, dup_1, dup_2, *p1, *p2, *tmp; |
---|
880 | BTREE *bt; |
---|
881 | BINTERNAL *bi; |
---|
882 | BKEYDATA *bk; |
---|
883 | BOVERFLOW *bo; |
---|
884 | VRFY_PAGEINFO *pip; |
---|
885 | db_indx_t i; |
---|
886 | int cmp, freedup_1, freedup_2, isbad, ret, t_ret; |
---|
887 | int (*dupfunc) __P((DB *, const DBT *, const DBT *)); |
---|
888 | int (*func) __P((DB *, const DBT *, const DBT *)); |
---|
889 | void *buf1, *buf2, *tmpbuf; |
---|
890 | |
---|
891 | /* |
---|
892 | * We need to work in the ORDERCHKONLY environment where we might |
---|
893 | * not have a pip, but we also may need to work in contexts where |
---|
894 | * NUM_ENT isn't safe. |
---|
895 | */ |
---|
896 | if (vdp != NULL) { |
---|
897 | if ((ret = __db_vrfy_getpageinfo(vdp, pgno, &pip)) != 0) |
---|
898 | return (ret); |
---|
899 | nentries = pip->entries; |
---|
900 | } else |
---|
901 | pip = NULL; |
---|
902 | |
---|
903 | ret = isbad = 0; |
---|
904 | bo = NULL; /* Shut up compiler. */ |
---|
905 | |
---|
906 | memset(&dbta, 0, sizeof(DBT)); |
---|
907 | F_SET(&dbta, DB_DBT_REALLOC); |
---|
908 | |
---|
909 | memset(&dbtb, 0, sizeof(DBT)); |
---|
910 | F_SET(&dbtb, DB_DBT_REALLOC); |
---|
911 | |
---|
912 | buf1 = buf2 = NULL; |
---|
913 | |
---|
914 | DB_ASSERT(!LF_ISSET(DB_NOORDERCHK)); |
---|
915 | |
---|
916 | dupfunc = (dbp->dup_compare == NULL) ? __bam_defcmp : dbp->dup_compare; |
---|
917 | if (TYPE(h) == P_LDUP) |
---|
918 | func = dupfunc; |
---|
919 | else { |
---|
920 | func = __bam_defcmp; |
---|
921 | if (dbp->bt_internal != NULL) { |
---|
922 | bt = (BTREE *)dbp->bt_internal; |
---|
923 | if (bt->bt_compare != NULL) |
---|
924 | func = bt->bt_compare; |
---|
925 | } |
---|
926 | } |
---|
927 | |
---|
928 | /* |
---|
929 | * We alternate our use of dbta and dbtb so that we can walk |
---|
930 | * through the page key-by-key without copying a dbt twice. |
---|
931 | * p1 is always the dbt for index i - 1, and p2 for index i. |
---|
932 | */ |
---|
933 | p1 = &dbta; |
---|
934 | p2 = &dbtb; |
---|
935 | |
---|
936 | /* |
---|
937 | * Loop through the entries. nentries ought to contain the |
---|
938 | * actual count, and so is a safe way to terminate the loop; whether |
---|
939 | * we inc. by one or two depends on whether we're a leaf page-- |
---|
940 | * on a leaf page, we care only about keys. On internal pages |
---|
941 | * and LDUP pages, we want to check the order of all entries. |
---|
942 | * |
---|
943 | * Note that on IBTREE pages, we start with item 1, since item |
---|
944 | * 0 doesn't get looked at by __bam_cmp. |
---|
945 | */ |
---|
946 | for (i = (TYPE(h) == P_IBTREE) ? 1 : 0; i < nentries; |
---|
947 | i += (TYPE(h) == P_LBTREE) ? P_INDX : O_INDX) { |
---|
948 | /* |
---|
949 | * Put key i-1, now in p2, into p1, by swapping DBTs and bufs. |
---|
950 | */ |
---|
951 | tmp = p1; |
---|
952 | p1 = p2; |
---|
953 | p2 = tmp; |
---|
954 | tmpbuf = buf1; |
---|
955 | buf1 = buf2; |
---|
956 | buf2 = tmpbuf; |
---|
957 | |
---|
958 | /* |
---|
959 | * Get key i into p2. |
---|
960 | */ |
---|
961 | switch (TYPE(h)) { |
---|
962 | case P_IBTREE: |
---|
963 | bi = GET_BINTERNAL(dbp, h, i); |
---|
964 | if (B_TYPE(bi->type) == B_OVERFLOW) { |
---|
965 | bo = (BOVERFLOW *)(bi->data); |
---|
966 | goto overflow; |
---|
967 | } else { |
---|
968 | p2->data = bi->data; |
---|
969 | p2->size = bi->len; |
---|
970 | } |
---|
971 | |
---|
972 | /* |
---|
973 | * The leftmost key on an internal page must be |
---|
974 | * len 0, since it's just a placeholder and |
---|
975 | * automatically sorts less than all keys. |
---|
976 | * |
---|
977 | * XXX |
---|
978 | * This criterion does not currently hold! |
---|
979 | * See todo list item #1686. Meanwhile, it's harmless |
---|
980 | * to just not check for it. |
---|
981 | */ |
---|
982 | #if 0 |
---|
983 | if (i == 0 && bi->len != 0) { |
---|
984 | isbad = 1; |
---|
985 | EPRINT((dbp->dbenv, |
---|
986 | "Page %lu: lowest key on internal page of nonzero length", |
---|
987 | (u_long)pgno)); |
---|
988 | } |
---|
989 | #endif |
---|
990 | break; |
---|
991 | case P_LBTREE: |
---|
992 | case P_LDUP: |
---|
993 | bk = GET_BKEYDATA(dbp, h, i); |
---|
994 | if (B_TYPE(bk->type) == B_OVERFLOW) { |
---|
995 | bo = (BOVERFLOW *)bk; |
---|
996 | goto overflow; |
---|
997 | } else { |
---|
998 | p2->data = bk->data; |
---|
999 | p2->size = bk->len; |
---|
1000 | } |
---|
1001 | break; |
---|
1002 | default: |
---|
1003 | /* |
---|
1004 | * This means our caller screwed up and sent us |
---|
1005 | * an inappropriate page. |
---|
1006 | */ |
---|
1007 | TYPE_ERR_PRINT(dbp->dbenv, |
---|
1008 | "__bam_vrfy_itemorder", pgno, TYPE(h)) |
---|
1009 | DB_ASSERT(0); |
---|
1010 | ret = EINVAL; |
---|
1011 | goto err; |
---|
1012 | } |
---|
1013 | |
---|
1014 | if (0) { |
---|
1015 | /* |
---|
1016 | * If ovflok != 1, we can't safely go chasing |
---|
1017 | * overflow pages with the normal routines now; |
---|
1018 | * they might be unsafe or nonexistent. Mark this |
---|
1019 | * page as incomplete and return. |
---|
1020 | * |
---|
1021 | * Note that we don't need to worry about freeing |
---|
1022 | * buffers, since they can't have been allocated |
---|
1023 | * if overflow items are unsafe. |
---|
1024 | */ |
---|
1025 | overflow: if (!ovflok) { |
---|
1026 | F_SET(pip, VRFY_INCOMPLETE); |
---|
1027 | goto err; |
---|
1028 | } |
---|
1029 | |
---|
1030 | /* |
---|
1031 | * Overflow items are safe to chase. Do so. |
---|
1032 | * Fetch the overflow item into p2->data, |
---|
1033 | * NULLing it or reallocing it as appropriate. |
---|
1034 | * |
---|
1035 | * (We set p2->data to buf2 before the call |
---|
1036 | * so we're sure to realloc if we can and if p2 |
---|
1037 | * was just pointing at a non-overflow item.) |
---|
1038 | */ |
---|
1039 | p2->data = buf2; |
---|
1040 | if ((ret = __db_goff(dbp, |
---|
1041 | p2, bo->tlen, bo->pgno, NULL, NULL)) != 0) { |
---|
1042 | isbad = 1; |
---|
1043 | EPRINT((dbp->dbenv, |
---|
1044 | "Page %lu: error %lu in fetching overflow item %lu", |
---|
1045 | (u_long)pgno, (u_long)ret, (u_long)i)); |
---|
1046 | } |
---|
1047 | /* In case it got realloc'ed and thus changed. */ |
---|
1048 | buf2 = p2->data; |
---|
1049 | } |
---|
1050 | |
---|
1051 | /* Compare with the last key. */ |
---|
1052 | if (p1->data != NULL && p2->data != NULL) { |
---|
1053 | cmp = func(dbp, p1, p2); |
---|
1054 | |
---|
1055 | /* comparison succeeded */ |
---|
1056 | if (cmp > 0) { |
---|
1057 | isbad = 1; |
---|
1058 | EPRINT((dbp->dbenv, |
---|
1059 | "Page %lu: out-of-order key at entry %lu", |
---|
1060 | (u_long)pgno, (u_long)i)); |
---|
1061 | /* proceed */ |
---|
1062 | } else if (cmp == 0) { |
---|
1063 | /* |
---|
1064 | * If they compared equally, this |
---|
1065 | * had better be a (sub)database with dups. |
---|
1066 | * Mark it so we can check during the |
---|
1067 | * structure check. |
---|
1068 | */ |
---|
1069 | if (pip != NULL) |
---|
1070 | F_SET(pip, VRFY_HAS_DUPS); |
---|
1071 | else if (hasdups == 0) { |
---|
1072 | isbad = 1; |
---|
1073 | EPRINT((dbp->dbenv, |
---|
1074 | "Page %lu: database with no duplicates has duplicated keys", |
---|
1075 | (u_long)pgno)); |
---|
1076 | } |
---|
1077 | |
---|
1078 | /* |
---|
1079 | * If we're a btree leaf, check to see |
---|
1080 | * if the data items of these on-page dups are |
---|
1081 | * in sorted order. If not, flag this, so |
---|
1082 | * that we can make sure during the |
---|
1083 | * structure checks that the DUPSORT flag |
---|
1084 | * is unset. |
---|
1085 | * |
---|
1086 | * At this point i points to a duplicate key. |
---|
1087 | * Compare the datum before it (same key) |
---|
1088 | * to the datum after it, i.e. i-1 to i+1. |
---|
1089 | */ |
---|
1090 | if (TYPE(h) == P_LBTREE) { |
---|
1091 | /* |
---|
1092 | * Unsafe; continue and we'll pick |
---|
1093 | * up the bogus nentries later. |
---|
1094 | */ |
---|
1095 | if (i + 1 >= (db_indx_t)nentries) |
---|
1096 | continue; |
---|
1097 | |
---|
1098 | /* |
---|
1099 | * We don't bother with clever memory |
---|
1100 | * management with on-page dups, |
---|
1101 | * as it's only really a big win |
---|
1102 | * in the overflow case, and overflow |
---|
1103 | * dups are probably (?) rare. |
---|
1104 | */ |
---|
1105 | if (((ret = __bam_safe_getdata(dbp, |
---|
1106 | h, i - 1, ovflok, &dup_1, |
---|
1107 | &freedup_1)) != 0) || |
---|
1108 | ((ret = __bam_safe_getdata(dbp, |
---|
1109 | h, i + 1, ovflok, &dup_2, |
---|
1110 | &freedup_2)) != 0)) |
---|
1111 | goto err; |
---|
1112 | |
---|
1113 | /* |
---|
1114 | * If either of the data are NULL, |
---|
1115 | * it's because they're overflows and |
---|
1116 | * it's not safe to chase them now. |
---|
1117 | * Mark an incomplete and return. |
---|
1118 | */ |
---|
1119 | if (dup_1.data == NULL || |
---|
1120 | dup_2.data == NULL) { |
---|
1121 | DB_ASSERT(!ovflok); |
---|
1122 | F_SET(pip, VRFY_INCOMPLETE); |
---|
1123 | goto err; |
---|
1124 | } |
---|
1125 | |
---|
1126 | /* |
---|
1127 | * If the dups are out of order, |
---|
1128 | * flag this. It's not an error |
---|
1129 | * until we do the structure check |
---|
1130 | * and see whether DUPSORT is set. |
---|
1131 | */ |
---|
1132 | if (dupfunc(dbp, &dup_1, &dup_2) > 0) |
---|
1133 | F_SET(pip, VRFY_DUPS_UNSORTED); |
---|
1134 | |
---|
1135 | if (freedup_1) |
---|
1136 | __os_ufree(dbp->dbenv, |
---|
1137 | dup_1.data); |
---|
1138 | if (freedup_2) |
---|
1139 | __os_ufree(dbp->dbenv, |
---|
1140 | dup_2.data); |
---|
1141 | } |
---|
1142 | } |
---|
1143 | } |
---|
1144 | } |
---|
1145 | |
---|
1146 | err: if (pip != NULL && ((t_ret = |
---|
1147 | __db_vrfy_putpageinfo(dbp->dbenv, vdp, pip)) != 0) && ret == 0) |
---|
1148 | ret = t_ret; |
---|
1149 | |
---|
1150 | if (buf1 != NULL) |
---|
1151 | __os_ufree(dbp->dbenv, buf1); |
---|
1152 | if (buf2 != NULL) |
---|
1153 | __os_ufree(dbp->dbenv, buf2); |
---|
1154 | |
---|
1155 | return ((ret == 0 && isbad == 1) ? DB_VERIFY_BAD : ret); |
---|
1156 | } |
---|
1157 | |
---|
1158 | /* |
---|
1159 | * __bam_vrfy_structure -- |
---|
1160 | * Verify the tree structure of a btree database (including the master |
---|
1161 | * database containing subdbs). |
---|
1162 | * |
---|
1163 | * PUBLIC: int __bam_vrfy_structure __P((DB *, VRFY_DBINFO *, db_pgno_t, |
---|
1164 | * PUBLIC: u_int32_t)); |
---|
1165 | */ |
---|
1166 | int |
---|
1167 | __bam_vrfy_structure(dbp, vdp, meta_pgno, flags) |
---|
1168 | DB *dbp; |
---|
1169 | VRFY_DBINFO *vdp; |
---|
1170 | db_pgno_t meta_pgno; |
---|
1171 | u_int32_t flags; |
---|
1172 | { |
---|
1173 | DB *pgset; |
---|
1174 | VRFY_PAGEINFO *mip, *rip; |
---|
1175 | db_pgno_t root, p; |
---|
1176 | int t_ret, ret; |
---|
1177 | u_int32_t nrecs, level, relen, stflags; |
---|
1178 | |
---|
1179 | mip = rip = 0; |
---|
1180 | pgset = vdp->pgset; |
---|
1181 | |
---|
1182 | if ((ret = __db_vrfy_getpageinfo(vdp, meta_pgno, &mip)) != 0) |
---|
1183 | return (ret); |
---|
1184 | |
---|
1185 | if ((ret = __db_vrfy_pgset_get(pgset, meta_pgno, (int *)&p)) != 0) |
---|
1186 | goto err; |
---|
1187 | if (p != 0) { |
---|
1188 | EPRINT((dbp->dbenv, |
---|
1189 | "Page %lu: btree metadata page observed twice", |
---|
1190 | (u_long)meta_pgno)); |
---|
1191 | ret = DB_VERIFY_BAD; |
---|
1192 | goto err; |
---|
1193 | } |
---|
1194 | if ((ret = __db_vrfy_pgset_inc(pgset, meta_pgno)) != 0) |
---|
1195 | goto err; |
---|
1196 | |
---|
1197 | root = mip->root; |
---|
1198 | |
---|
1199 | if (root == 0) { |
---|
1200 | EPRINT((dbp->dbenv, |
---|
1201 | "Page %lu: btree metadata page has no root", |
---|
1202 | (u_long)meta_pgno)); |
---|
1203 | ret = DB_VERIFY_BAD; |
---|
1204 | goto err; |
---|
1205 | } |
---|
1206 | |
---|
1207 | if ((ret = __db_vrfy_getpageinfo(vdp, root, &rip)) != 0) |
---|
1208 | goto err; |
---|
1209 | |
---|
1210 | switch (rip->type) { |
---|
1211 | case P_IBTREE: |
---|
1212 | case P_LBTREE: |
---|
1213 | stflags = flags | ST_TOPLEVEL; |
---|
1214 | if (F_ISSET(mip, VRFY_HAS_DUPS)) |
---|
1215 | stflags |= ST_DUPOK; |
---|
1216 | if (F_ISSET(mip, VRFY_HAS_DUPSORT)) |
---|
1217 | stflags |= ST_DUPSORT; |
---|
1218 | if (F_ISSET(mip, VRFY_HAS_RECNUMS)) |
---|
1219 | stflags |= ST_RECNUM; |
---|
1220 | ret = __bam_vrfy_subtree(dbp, |
---|
1221 | vdp, root, NULL, NULL, stflags, NULL, NULL, NULL); |
---|
1222 | break; |
---|
1223 | case P_IRECNO: |
---|
1224 | case P_LRECNO: |
---|
1225 | stflags = flags | ST_RECNUM | ST_IS_RECNO | ST_TOPLEVEL; |
---|
1226 | if (mip->re_len > 0) |
---|
1227 | stflags |= ST_RELEN; |
---|
1228 | if ((ret = __bam_vrfy_subtree(dbp, vdp, |
---|
1229 | root, NULL, NULL, stflags, &level, &nrecs, &relen)) != 0) |
---|
1230 | goto err; |
---|
1231 | /* |
---|
1232 | * Even if mip->re_len > 0, re_len may come back zero if the |
---|
1233 | * tree is empty. It should be okay to just skip the check in |
---|
1234 | * this case, as if there are any non-deleted keys at all, |
---|
1235 | * that should never happen. |
---|
1236 | */ |
---|
1237 | if (mip->re_len > 0 && relen > 0 && mip->re_len != relen) { |
---|
1238 | EPRINT((dbp->dbenv, |
---|
1239 | "Page %lu: recno database has bad re_len %lu", |
---|
1240 | (u_long)meta_pgno, (u_long)relen)); |
---|
1241 | ret = DB_VERIFY_BAD; |
---|
1242 | goto err; |
---|
1243 | } |
---|
1244 | ret = 0; |
---|
1245 | break; |
---|
1246 | case P_LDUP: |
---|
1247 | EPRINT((dbp->dbenv, |
---|
1248 | "Page %lu: duplicate tree referenced from metadata page", |
---|
1249 | (u_long)meta_pgno)); |
---|
1250 | ret = DB_VERIFY_BAD; |
---|
1251 | break; |
---|
1252 | default: |
---|
1253 | EPRINT((dbp->dbenv, |
---|
1254 | "Page %lu: btree root of incorrect type %lu on metadata page", |
---|
1255 | (u_long)meta_pgno, (u_long)rip->type)); |
---|
1256 | ret = DB_VERIFY_BAD; |
---|
1257 | break; |
---|
1258 | } |
---|
1259 | |
---|
1260 | err: if (mip != NULL && ((t_ret = |
---|
1261 | __db_vrfy_putpageinfo(dbp->dbenv, vdp, mip)) != 0) && ret == 0) |
---|
1262 | ret = t_ret; |
---|
1263 | if (rip != NULL && ((t_ret = |
---|
1264 | __db_vrfy_putpageinfo(dbp->dbenv, vdp, rip)) != 0) && ret == 0) |
---|
1265 | ret = t_ret; |
---|
1266 | return (ret); |
---|
1267 | } |
---|
1268 | |
---|
1269 | /* |
---|
1270 | * __bam_vrfy_subtree-- |
---|
1271 | * Verify a subtree (or entire) btree with specified root. |
---|
1272 | * |
---|
1273 | * Note that this is public because it must be called to verify |
---|
1274 | * offpage dup trees, including from hash. |
---|
1275 | * |
---|
1276 | * PUBLIC: int __bam_vrfy_subtree __P((DB *, VRFY_DBINFO *, db_pgno_t, void *, |
---|
1277 | * PUBLIC: void *, u_int32_t, u_int32_t *, u_int32_t *, u_int32_t *)); |
---|
1278 | */ |
---|
1279 | int |
---|
1280 | __bam_vrfy_subtree(dbp, |
---|
1281 | vdp, pgno, l, r, flags, levelp, nrecsp, relenp) |
---|
1282 | DB *dbp; |
---|
1283 | VRFY_DBINFO *vdp; |
---|
1284 | db_pgno_t pgno; |
---|
1285 | void *l, *r; |
---|
1286 | u_int32_t flags, *levelp, *nrecsp, *relenp; |
---|
1287 | { |
---|
1288 | BINTERNAL *li, *ri, *lp, *rp; |
---|
1289 | DB *pgset; |
---|
1290 | DB_MPOOLFILE *mpf; |
---|
1291 | DBC *cc; |
---|
1292 | PAGE *h; |
---|
1293 | VRFY_CHILDINFO *child; |
---|
1294 | VRFY_PAGEINFO *pip; |
---|
1295 | db_indx_t i; |
---|
1296 | db_pgno_t next_pgno, prev_pgno; |
---|
1297 | db_recno_t child_nrecs, nrecs; |
---|
1298 | u_int32_t child_level, child_relen, level, relen, stflags; |
---|
1299 | u_int8_t leaf_type; |
---|
1300 | int (*func) __P((DB *, const DBT *, const DBT *)); |
---|
1301 | int isbad, p, ret, t_ret, toplevel; |
---|
1302 | |
---|
1303 | mpf = dbp->mpf; |
---|
1304 | ret = isbad = 0; |
---|
1305 | nrecs = 0; |
---|
1306 | h = NULL; |
---|
1307 | relen = 0; |
---|
1308 | leaf_type = P_INVALID; |
---|
1309 | next_pgno = prev_pgno = PGNO_INVALID; |
---|
1310 | rp = (BINTERNAL *)r; |
---|
1311 | lp = (BINTERNAL *)l; |
---|
1312 | |
---|
1313 | /* Provide feedback on our progress to the application. */ |
---|
1314 | if (!LF_ISSET(DB_SALVAGE)) |
---|
1315 | __db_vrfy_struct_feedback(dbp, vdp); |
---|
1316 | |
---|
1317 | if ((ret = __db_vrfy_getpageinfo(vdp, pgno, &pip)) != 0) |
---|
1318 | return (ret); |
---|
1319 | |
---|
1320 | cc = NULL; |
---|
1321 | level = pip->bt_level; |
---|
1322 | |
---|
1323 | toplevel = LF_ISSET(ST_TOPLEVEL) ? 1 : 0; |
---|
1324 | LF_CLR(ST_TOPLEVEL); |
---|
1325 | |
---|
1326 | /* |
---|
1327 | * If this is the root, initialize the vdp's prev- and next-pgno |
---|
1328 | * accounting. |
---|
1329 | * |
---|
1330 | * For each leaf page we hit, we'll want to make sure that |
---|
1331 | * vdp->prev_pgno is the same as pip->prev_pgno and vdp->next_pgno is |
---|
1332 | * our page number. Then, we'll set vdp->next_pgno to pip->next_pgno |
---|
1333 | * and vdp->prev_pgno to our page number, and the next leaf page in |
---|
1334 | * line should be able to do the same verification. |
---|
1335 | */ |
---|
1336 | if (toplevel) { |
---|
1337 | /* |
---|
1338 | * Cache the values stored in the vdp so that if we're an |
---|
1339 | * auxiliary tree such as an off-page duplicate set, our |
---|
1340 | * caller's leaf page chain doesn't get lost. |
---|
1341 | */ |
---|
1342 | prev_pgno = vdp->prev_pgno; |
---|
1343 | next_pgno = vdp->next_pgno; |
---|
1344 | leaf_type = vdp->leaf_type; |
---|
1345 | vdp->next_pgno = vdp->prev_pgno = PGNO_INVALID; |
---|
1346 | vdp->leaf_type = P_INVALID; |
---|
1347 | } |
---|
1348 | |
---|
1349 | /* |
---|
1350 | * We are recursively descending a btree, starting from the root |
---|
1351 | * and working our way out to the leaves. |
---|
1352 | * |
---|
1353 | * There are four cases we need to deal with: |
---|
1354 | * 1. pgno is a recno leaf page. Any children are overflows. |
---|
1355 | * 2. pgno is a duplicate leaf page. Any children |
---|
1356 | * are overflow pages; traverse them, and then return |
---|
1357 | * level and nrecs. |
---|
1358 | * 3. pgno is an ordinary leaf page. Check whether dups are |
---|
1359 | * allowed, and if so, traverse any off-page dups or |
---|
1360 | * overflows. Then return nrecs and level. |
---|
1361 | * 4. pgno is a recno internal page. Recursively check any |
---|
1362 | * child pages, making sure their levels are one lower |
---|
1363 | * and their nrecs sum to ours. |
---|
1364 | * 5. pgno is a btree internal page. Same as #4, plus we |
---|
1365 | * must verify that for each pair of BINTERNAL entries |
---|
1366 | * N and N+1, the leftmost item on N's child sorts |
---|
1367 | * greater than N, and the rightmost item on N's child |
---|
1368 | * sorts less than N+1. |
---|
1369 | * |
---|
1370 | * Furthermore, in any sorted page type (P_LDUP, P_LBTREE, P_IBTREE), |
---|
1371 | * we need to verify the internal sort order is correct if, |
---|
1372 | * due to overflow items, we were not able to do so earlier. |
---|
1373 | */ |
---|
1374 | switch (pip->type) { |
---|
1375 | case P_LRECNO: |
---|
1376 | case P_LDUP: |
---|
1377 | case P_LBTREE: |
---|
1378 | /* |
---|
1379 | * Cases 1, 2 and 3. |
---|
1380 | * |
---|
1381 | * We're some sort of leaf page; verify |
---|
1382 | * that our linked list of leaves is consistent. |
---|
1383 | */ |
---|
1384 | if (vdp->leaf_type == P_INVALID) { |
---|
1385 | /* |
---|
1386 | * First leaf page. Set the type that all its |
---|
1387 | * successors should be, and verify that our prev_pgno |
---|
1388 | * is PGNO_INVALID. |
---|
1389 | */ |
---|
1390 | vdp->leaf_type = pip->type; |
---|
1391 | if (pip->prev_pgno != PGNO_INVALID) |
---|
1392 | goto bad_prev; |
---|
1393 | } else { |
---|
1394 | /* |
---|
1395 | * Successor leaf page. Check our type, the previous |
---|
1396 | * page's next_pgno, and our prev_pgno. |
---|
1397 | */ |
---|
1398 | if (pip->type != vdp->leaf_type) { |
---|
1399 | EPRINT((dbp->dbenv, |
---|
1400 | "Page %lu: unexpected page type %lu found in leaf chain (expected %lu)", |
---|
1401 | (u_long)pip->pgno, (u_long)pip->type, |
---|
1402 | (u_long)vdp->leaf_type)); |
---|
1403 | isbad = 1; |
---|
1404 | } |
---|
1405 | if (pip->pgno != vdp->next_pgno) { |
---|
1406 | EPRINT((dbp->dbenv, |
---|
1407 | "Page %lu: incorrect next_pgno %lu found in leaf chain (should be %lu)", |
---|
1408 | (u_long)vdp->prev_pgno, |
---|
1409 | (u_long)vdp->next_pgno, (u_long)pip->pgno)); |
---|
1410 | isbad = 1; |
---|
1411 | } |
---|
1412 | if (pip->prev_pgno != vdp->prev_pgno) { |
---|
1413 | bad_prev: EPRINT((dbp->dbenv, |
---|
1414 | "Page %lu: incorrect prev_pgno %lu found in leaf chain (should be %lu)", |
---|
1415 | (u_long)pip->pgno, (u_long)pip->prev_pgno, |
---|
1416 | (u_long)vdp->prev_pgno)); |
---|
1417 | isbad = 1; |
---|
1418 | } |
---|
1419 | } |
---|
1420 | vdp->prev_pgno = pip->pgno; |
---|
1421 | vdp->next_pgno = pip->next_pgno; |
---|
1422 | |
---|
1423 | /* |
---|
1424 | * Overflow pages are common to all three leaf types; |
---|
1425 | * traverse the child list, looking for overflows. |
---|
1426 | */ |
---|
1427 | if ((ret = __db_vrfy_childcursor(vdp, &cc)) != 0) |
---|
1428 | goto err; |
---|
1429 | for (ret = __db_vrfy_ccset(cc, pgno, &child); ret == 0; |
---|
1430 | ret = __db_vrfy_ccnext(cc, &child)) |
---|
1431 | if (child->type == V_OVERFLOW && |
---|
1432 | (ret = __db_vrfy_ovfl_structure(dbp, vdp, |
---|
1433 | child->pgno, child->tlen, |
---|
1434 | flags | ST_OVFL_LEAF)) != 0) { |
---|
1435 | if (ret == DB_VERIFY_BAD) |
---|
1436 | isbad = 1; |
---|
1437 | else |
---|
1438 | goto done; |
---|
1439 | } |
---|
1440 | |
---|
1441 | if ((ret = __db_vrfy_ccclose(cc)) != 0) |
---|
1442 | goto err; |
---|
1443 | cc = NULL; |
---|
1444 | |
---|
1445 | /* Case 1 */ |
---|
1446 | if (pip->type == P_LRECNO) { |
---|
1447 | if (!LF_ISSET(ST_IS_RECNO) && |
---|
1448 | !(LF_ISSET(ST_DUPOK) && !LF_ISSET(ST_DUPSORT))) { |
---|
1449 | isbad = 1; |
---|
1450 | EPRINT((dbp->dbenv, |
---|
1451 | "Page %lu: recno leaf page non-recno tree", |
---|
1452 | (u_long)pgno)); |
---|
1453 | goto done; |
---|
1454 | } |
---|
1455 | goto leaf; |
---|
1456 | } else if (LF_ISSET(ST_IS_RECNO)) { |
---|
1457 | /* |
---|
1458 | * It's a non-recno leaf. Had better not be a recno |
---|
1459 | * subtree. |
---|
1460 | */ |
---|
1461 | isbad = 1; |
---|
1462 | EPRINT((dbp->dbenv, |
---|
1463 | "Page %lu: non-recno leaf page in recno tree", |
---|
1464 | (u_long)pgno)); |
---|
1465 | goto done; |
---|
1466 | } |
---|
1467 | |
---|
1468 | /* Case 2--no more work. */ |
---|
1469 | if (pip->type == P_LDUP) |
---|
1470 | goto leaf; |
---|
1471 | |
---|
1472 | /* Case 3 */ |
---|
1473 | |
---|
1474 | /* Check if we have any dups. */ |
---|
1475 | if (F_ISSET(pip, VRFY_HAS_DUPS)) { |
---|
1476 | /* If dups aren't allowed in this btree, trouble. */ |
---|
1477 | if (!LF_ISSET(ST_DUPOK)) { |
---|
1478 | isbad = 1; |
---|
1479 | EPRINT((dbp->dbenv, |
---|
1480 | "Page %lu: duplicates in non-dup btree", |
---|
1481 | (u_long)pgno)); |
---|
1482 | } else { |
---|
1483 | /* |
---|
1484 | * We correctly have dups. If any are off-page, |
---|
1485 | * traverse those btrees recursively. |
---|
1486 | */ |
---|
1487 | if ((ret = |
---|
1488 | __db_vrfy_childcursor(vdp, &cc)) != 0) |
---|
1489 | goto err; |
---|
1490 | for (ret = __db_vrfy_ccset(cc, pgno, &child); |
---|
1491 | ret == 0; |
---|
1492 | ret = __db_vrfy_ccnext(cc, &child)) { |
---|
1493 | stflags = flags | ST_RECNUM | ST_DUPSET; |
---|
1494 | /* Skip any overflow entries. */ |
---|
1495 | if (child->type == V_DUPLICATE) { |
---|
1496 | if ((ret = __db_vrfy_duptype( |
---|
1497 | dbp, vdp, child->pgno, |
---|
1498 | stflags)) != 0) { |
---|
1499 | isbad = 1; |
---|
1500 | /* Next child. */ |
---|
1501 | continue; |
---|
1502 | } |
---|
1503 | if ((ret = __bam_vrfy_subtree( |
---|
1504 | dbp, vdp, child->pgno, NULL, |
---|
1505 | NULL, stflags | ST_TOPLEVEL, |
---|
1506 | NULL, NULL, NULL)) != 0) { |
---|
1507 | if (ret != |
---|
1508 | DB_VERIFY_BAD) |
---|
1509 | goto err; |
---|
1510 | else |
---|
1511 | isbad = 1; |
---|
1512 | } |
---|
1513 | } |
---|
1514 | } |
---|
1515 | |
---|
1516 | if ((ret = __db_vrfy_ccclose(cc)) != 0) |
---|
1517 | goto err; |
---|
1518 | cc = NULL; |
---|
1519 | |
---|
1520 | /* |
---|
1521 | * If VRFY_DUPS_UNSORTED is set, |
---|
1522 | * ST_DUPSORT had better not be. |
---|
1523 | */ |
---|
1524 | if (F_ISSET(pip, VRFY_DUPS_UNSORTED) && |
---|
1525 | LF_ISSET(ST_DUPSORT)) { |
---|
1526 | EPRINT((dbp->dbenv, |
---|
1527 | "Page %lu: unsorted duplicate set in sorted-dup database", |
---|
1528 | (u_long)pgno)); |
---|
1529 | isbad = 1; |
---|
1530 | } |
---|
1531 | } |
---|
1532 | } |
---|
1533 | goto leaf; |
---|
1534 | case P_IBTREE: |
---|
1535 | case P_IRECNO: |
---|
1536 | /* We handle these below. */ |
---|
1537 | break; |
---|
1538 | default: |
---|
1539 | /* |
---|
1540 | * If a P_IBTREE or P_IRECNO contains a reference to an |
---|
1541 | * invalid page, we'll wind up here; handle it gracefully. |
---|
1542 | * Note that the code at the "done" label assumes that the |
---|
1543 | * current page is a btree/recno one of some sort; this |
---|
1544 | * is not the case here, so we goto err. |
---|
1545 | * |
---|
1546 | * If the page is entirely zeroed, its pip->type will be a lie |
---|
1547 | * (we assumed it was a hash page, as they're allowed to be |
---|
1548 | * zeroed); handle this case specially. |
---|
1549 | */ |
---|
1550 | if (F_ISSET(pip, VRFY_IS_ALLZEROES)) |
---|
1551 | ZEROPG_ERR_PRINT(dbp->dbenv, |
---|
1552 | pgno, "btree or recno page"); |
---|
1553 | else |
---|
1554 | EPRINT((dbp->dbenv, |
---|
1555 | "Page %lu: btree or recno page is of inappropriate type %lu", |
---|
1556 | (u_long)pgno, (u_long)pip->type)); |
---|
1557 | ret = DB_VERIFY_BAD; |
---|
1558 | goto err; |
---|
1559 | } |
---|
1560 | |
---|
1561 | /* |
---|
1562 | * Cases 4 & 5: This is a btree or recno internal page. For each child, |
---|
1563 | * recurse, keeping a running count of nrecs and making sure the level |
---|
1564 | * is always reasonable. |
---|
1565 | */ |
---|
1566 | if ((ret = __db_vrfy_childcursor(vdp, &cc)) != 0) |
---|
1567 | goto err; |
---|
1568 | for (ret = __db_vrfy_ccset(cc, pgno, &child); ret == 0; |
---|
1569 | ret = __db_vrfy_ccnext(cc, &child)) |
---|
1570 | if (child->type == V_RECNO) { |
---|
1571 | if (pip->type != P_IRECNO) { |
---|
1572 | TYPE_ERR_PRINT(dbp->dbenv, "__bam_vrfy_subtree", |
---|
1573 | pgno, pip->type); |
---|
1574 | DB_ASSERT(0); |
---|
1575 | ret = EINVAL; |
---|
1576 | goto err; |
---|
1577 | } |
---|
1578 | if ((ret = __bam_vrfy_subtree(dbp, vdp, child->pgno, |
---|
1579 | NULL, NULL, flags, &child_level, &child_nrecs, |
---|
1580 | &child_relen)) != 0) { |
---|
1581 | if (ret != DB_VERIFY_BAD) |
---|
1582 | goto done; |
---|
1583 | else |
---|
1584 | isbad = 1; |
---|
1585 | } |
---|
1586 | |
---|
1587 | if (LF_ISSET(ST_RELEN)) { |
---|
1588 | if (relen == 0) |
---|
1589 | relen = child_relen; |
---|
1590 | /* |
---|
1591 | * child_relen may be zero if the child subtree |
---|
1592 | * is empty. |
---|
1593 | */ |
---|
1594 | else if (child_relen > 0 && |
---|
1595 | relen != child_relen) { |
---|
1596 | isbad = 1; |
---|
1597 | EPRINT((dbp->dbenv, |
---|
1598 | "Page %lu: recno page returned bad re_len %lu", |
---|
1599 | (u_long)child->pgno, |
---|
1600 | (u_long)child_relen)); |
---|
1601 | } |
---|
1602 | if (relenp) |
---|
1603 | *relenp = relen; |
---|
1604 | } |
---|
1605 | if (LF_ISSET(ST_RECNUM)) |
---|
1606 | nrecs += child_nrecs; |
---|
1607 | if (level != child_level + 1) { |
---|
1608 | isbad = 1; |
---|
1609 | EPRINT((dbp->dbenv, "Page %lu: recno level incorrect: got %lu, expected %lu", |
---|
1610 | (u_long)child->pgno, (u_long)child_level, |
---|
1611 | (u_long)(level - 1))); |
---|
1612 | } |
---|
1613 | } else if (child->type == V_OVERFLOW && |
---|
1614 | (ret = __db_vrfy_ovfl_structure(dbp, vdp, |
---|
1615 | child->pgno, child->tlen, flags)) != 0) { |
---|
1616 | if (ret == DB_VERIFY_BAD) |
---|
1617 | isbad = 1; |
---|
1618 | else |
---|
1619 | goto done; |
---|
1620 | } |
---|
1621 | |
---|
1622 | if ((ret = __db_vrfy_ccclose(cc)) != 0) |
---|
1623 | goto err; |
---|
1624 | cc = NULL; |
---|
1625 | |
---|
1626 | /* We're done with case 4. */ |
---|
1627 | if (pip->type == P_IRECNO) |
---|
1628 | goto done; |
---|
1629 | |
---|
1630 | /* |
---|
1631 | * Case 5. Btree internal pages. |
---|
1632 | * As described above, we need to iterate through all the |
---|
1633 | * items on the page and make sure that our children sort appropriately |
---|
1634 | * with respect to them. |
---|
1635 | * |
---|
1636 | * For each entry, li will be the "left-hand" key for the entry |
---|
1637 | * itself, which must sort lower than all entries on its child; |
---|
1638 | * ri will be the key to its right, which must sort greater. |
---|
1639 | */ |
---|
1640 | if (h == NULL && (ret = mpf->get(mpf, &pgno, 0, &h)) != 0) |
---|
1641 | goto err; |
---|
1642 | for (i = 0; i < pip->entries; i += O_INDX) { |
---|
1643 | li = GET_BINTERNAL(dbp, h, i); |
---|
1644 | ri = (i + O_INDX < pip->entries) ? |
---|
1645 | GET_BINTERNAL(dbp, h, i + O_INDX) : NULL; |
---|
1646 | |
---|
1647 | /* |
---|
1648 | * The leftmost key is forcibly sorted less than all entries, |
---|
1649 | * so don't bother passing it. |
---|
1650 | */ |
---|
1651 | if ((ret = __bam_vrfy_subtree(dbp, vdp, li->pgno, |
---|
1652 | i == 0 ? NULL : li, ri, flags, &child_level, |
---|
1653 | &child_nrecs, NULL)) != 0) { |
---|
1654 | if (ret != DB_VERIFY_BAD) |
---|
1655 | goto done; |
---|
1656 | else |
---|
1657 | isbad = 1; |
---|
1658 | } |
---|
1659 | |
---|
1660 | if (LF_ISSET(ST_RECNUM)) { |
---|
1661 | /* |
---|
1662 | * Keep a running tally on the actual record count so |
---|
1663 | * we can return it to our parent (if we have one) or |
---|
1664 | * compare it to the NRECS field if we're a root page. |
---|
1665 | */ |
---|
1666 | nrecs += child_nrecs; |
---|
1667 | |
---|
1668 | /* |
---|
1669 | * Make sure the actual record count of the child |
---|
1670 | * is equal to the value in the BINTERNAL structure. |
---|
1671 | */ |
---|
1672 | if (li->nrecs != child_nrecs) { |
---|
1673 | isbad = 1; |
---|
1674 | EPRINT((dbp->dbenv, |
---|
1675 | "Page %lu: item %lu has incorrect record count of %lu, should be %lu", |
---|
1676 | (u_long)pgno, (u_long)i, (u_long)li->nrecs, |
---|
1677 | (u_long)child_nrecs)); |
---|
1678 | } |
---|
1679 | } |
---|
1680 | |
---|
1681 | if (level != child_level + 1) { |
---|
1682 | isbad = 1; |
---|
1683 | EPRINT((dbp->dbenv, |
---|
1684 | "Page %lu: Btree level incorrect: got %lu, expected %lu", |
---|
1685 | (u_long)li->pgno, |
---|
1686 | (u_long)child_level, (u_long)(level - 1))); |
---|
1687 | } |
---|
1688 | } |
---|
1689 | |
---|
1690 | if (0) { |
---|
1691 | leaf: level = LEAFLEVEL; |
---|
1692 | if (LF_ISSET(ST_RECNUM)) |
---|
1693 | nrecs = pip->rec_cnt; |
---|
1694 | |
---|
1695 | /* XXX |
---|
1696 | * We should verify that the record count on a leaf page |
---|
1697 | * is the sum of the number of keys and the number of |
---|
1698 | * records in its off-page dups. This requires looking |
---|
1699 | * at the page again, however, and it may all be changing |
---|
1700 | * soon, so for now we don't bother. |
---|
1701 | */ |
---|
1702 | |
---|
1703 | if (LF_ISSET(ST_RELEN) && relenp) |
---|
1704 | *relenp = pip->re_len; |
---|
1705 | } |
---|
1706 | done: if (F_ISSET(pip, VRFY_INCOMPLETE) && isbad == 0 && ret == 0) { |
---|
1707 | /* |
---|
1708 | * During the page-by-page pass, item order verification was |
---|
1709 | * not finished due to the presence of overflow items. If |
---|
1710 | * isbad == 0, though, it's now safe to do so, as we've |
---|
1711 | * traversed any child overflow pages. Do it. |
---|
1712 | */ |
---|
1713 | if (h == NULL && (ret = mpf->get(mpf, &pgno, 0, &h)) != 0) |
---|
1714 | goto err; |
---|
1715 | if ((ret = __bam_vrfy_itemorder(dbp, |
---|
1716 | vdp, h, pgno, 0, 1, 0, flags)) != 0) |
---|
1717 | goto err; |
---|
1718 | F_CLR(pip, VRFY_INCOMPLETE); |
---|
1719 | } |
---|
1720 | |
---|
1721 | /* |
---|
1722 | * It's possible to get to this point with a page that has no |
---|
1723 | * items, but without having detected any sort of failure yet. |
---|
1724 | * Having zero items is legal if it's a leaf--it may be the |
---|
1725 | * root page in an empty tree, or the tree may have been |
---|
1726 | * modified with the DB_REVSPLITOFF flag set (there's no way |
---|
1727 | * to tell from what's on disk). For an internal page, |
---|
1728 | * though, having no items is a problem (all internal pages |
---|
1729 | * must have children). |
---|
1730 | */ |
---|
1731 | if (isbad == 0 && ret == 0) { |
---|
1732 | if (h == NULL && (ret = mpf->get(mpf, &pgno, 0, &h)) != 0) |
---|
1733 | goto err; |
---|
1734 | |
---|
1735 | if (NUM_ENT(h) == 0 && ISINTERNAL(h)) { |
---|
1736 | EPRINT((dbp->dbenv, |
---|
1737 | "Page %lu: internal page is empty and should not be", |
---|
1738 | (u_long)pgno)); |
---|
1739 | isbad = 1; |
---|
1740 | goto err; |
---|
1741 | } |
---|
1742 | } |
---|
1743 | |
---|
1744 | /* |
---|
1745 | * Our parent has sent us BINTERNAL pointers to parent records |
---|
1746 | * so that we can verify our place with respect to them. If it's |
---|
1747 | * appropriate--we have a default sort function--verify this. |
---|
1748 | */ |
---|
1749 | if (isbad == 0 && ret == 0 && !LF_ISSET(DB_NOORDERCHK) && lp != NULL) { |
---|
1750 | if (h == NULL && (ret = mpf->get(mpf, &pgno, 0, &h)) != 0) |
---|
1751 | goto err; |
---|
1752 | |
---|
1753 | /* |
---|
1754 | * __bam_vrfy_treeorder needs to know what comparison function |
---|
1755 | * to use. If ST_DUPSET is set, we're in a duplicate tree |
---|
1756 | * and we use the duplicate comparison function; otherwise, |
---|
1757 | * use the btree one. If unset, use the default, of course. |
---|
1758 | */ |
---|
1759 | func = LF_ISSET(ST_DUPSET) ? dbp->dup_compare : |
---|
1760 | ((BTREE *)dbp->bt_internal)->bt_compare; |
---|
1761 | if (func == NULL) |
---|
1762 | func = __bam_defcmp; |
---|
1763 | |
---|
1764 | if ((ret = __bam_vrfy_treeorder( |
---|
1765 | dbp, pgno, h, lp, rp, func, flags)) != 0) { |
---|
1766 | if (ret == DB_VERIFY_BAD) |
---|
1767 | isbad = 1; |
---|
1768 | else |
---|
1769 | goto err; |
---|
1770 | } |
---|
1771 | } |
---|
1772 | |
---|
1773 | /* |
---|
1774 | * This is guaranteed to succeed for leaf pages, but no harm done. |
---|
1775 | * |
---|
1776 | * Internal pages below the top level do not store their own |
---|
1777 | * record numbers, so we skip them. |
---|
1778 | */ |
---|
1779 | if (LF_ISSET(ST_RECNUM) && nrecs != pip->rec_cnt && toplevel) { |
---|
1780 | isbad = 1; |
---|
1781 | EPRINT((dbp->dbenv, |
---|
1782 | "Page %lu: bad record count: has %lu records, claims %lu", |
---|
1783 | (u_long)pgno, (u_long)nrecs, (u_long)pip->rec_cnt)); |
---|
1784 | } |
---|
1785 | |
---|
1786 | if (levelp) |
---|
1787 | *levelp = level; |
---|
1788 | if (nrecsp) |
---|
1789 | *nrecsp = nrecs; |
---|
1790 | |
---|
1791 | pgset = vdp->pgset; |
---|
1792 | if ((ret = __db_vrfy_pgset_get(pgset, pgno, &p)) != 0) |
---|
1793 | goto err; |
---|
1794 | if (p != 0) { |
---|
1795 | isbad = 1; |
---|
1796 | EPRINT((dbp->dbenv, "Page %lu: linked twice", (u_long)pgno)); |
---|
1797 | } else if ((ret = __db_vrfy_pgset_inc(pgset, pgno)) != 0) |
---|
1798 | goto err; |
---|
1799 | |
---|
1800 | if (toplevel) |
---|
1801 | /* |
---|
1802 | * The last page's next_pgno in the leaf chain should have been |
---|
1803 | * PGNO_INVALID. |
---|
1804 | */ |
---|
1805 | if (vdp->next_pgno != PGNO_INVALID) { |
---|
1806 | EPRINT((dbp->dbenv, "Page %lu: unterminated leaf chain", |
---|
1807 | (u_long)vdp->prev_pgno)); |
---|
1808 | isbad = 1; |
---|
1809 | } |
---|
1810 | |
---|
1811 | err: if (toplevel) { |
---|
1812 | /* Restore our caller's settings. */ |
---|
1813 | vdp->next_pgno = next_pgno; |
---|
1814 | vdp->prev_pgno = prev_pgno; |
---|
1815 | vdp->leaf_type = leaf_type; |
---|
1816 | } |
---|
1817 | |
---|
1818 | if (h != NULL && (t_ret = mpf->put(mpf, h, 0)) != 0 && ret == 0) |
---|
1819 | ret = t_ret; |
---|
1820 | if ((t_ret = |
---|
1821 | __db_vrfy_putpageinfo(dbp->dbenv, vdp, pip)) != 0 && ret == 0) |
---|
1822 | ret = t_ret; |
---|
1823 | if (cc != NULL && ((t_ret = __db_vrfy_ccclose(cc)) != 0) && ret == 0) |
---|
1824 | ret = t_ret; |
---|
1825 | return ((ret == 0 && isbad == 1) ? DB_VERIFY_BAD : ret); |
---|
1826 | } |
---|
1827 | |
---|
1828 | /* |
---|
1829 | * __bam_vrfy_treeorder -- |
---|
1830 | * Verify that the lowest key on a page sorts greater than the |
---|
1831 | * BINTERNAL which points to it (lp), and the highest key |
---|
1832 | * sorts less than the BINTERNAL above that (rp). |
---|
1833 | * |
---|
1834 | * If lp is NULL, this means that it was the leftmost key on the |
---|
1835 | * parent, which (regardless of sort function) sorts less than |
---|
1836 | * all keys. No need to check it. |
---|
1837 | * |
---|
1838 | * If rp is NULL, lp was the highest key on the parent, so there's |
---|
1839 | * no higher key we must sort less than. |
---|
1840 | */ |
---|
1841 | static int |
---|
1842 | __bam_vrfy_treeorder(dbp, pgno, h, lp, rp, func, flags) |
---|
1843 | DB *dbp; |
---|
1844 | db_pgno_t pgno; |
---|
1845 | PAGE *h; |
---|
1846 | BINTERNAL *lp, *rp; |
---|
1847 | int (*func) __P((DB *, const DBT *, const DBT *)); |
---|
1848 | u_int32_t flags; |
---|
1849 | { |
---|
1850 | BOVERFLOW *bo; |
---|
1851 | DBT dbt; |
---|
1852 | db_indx_t last; |
---|
1853 | int ret, cmp; |
---|
1854 | |
---|
1855 | memset(&dbt, 0, sizeof(DBT)); |
---|
1856 | F_SET(&dbt, DB_DBT_MALLOC); |
---|
1857 | ret = 0; |
---|
1858 | |
---|
1859 | /* |
---|
1860 | * Empty pages are sorted correctly by definition. We check |
---|
1861 | * to see whether they ought to be empty elsewhere; leaf |
---|
1862 | * pages legally may be. |
---|
1863 | */ |
---|
1864 | if (NUM_ENT(h) == 0) |
---|
1865 | return (0); |
---|
1866 | |
---|
1867 | switch (TYPE(h)) { |
---|
1868 | case P_IBTREE: |
---|
1869 | case P_LDUP: |
---|
1870 | last = NUM_ENT(h) - O_INDX; |
---|
1871 | break; |
---|
1872 | case P_LBTREE: |
---|
1873 | last = NUM_ENT(h) - P_INDX; |
---|
1874 | break; |
---|
1875 | default: |
---|
1876 | TYPE_ERR_PRINT(dbp->dbenv, |
---|
1877 | "__bam_vrfy_treeorder", pgno, TYPE(h)); |
---|
1878 | DB_ASSERT(0); |
---|
1879 | return (EINVAL); |
---|
1880 | } |
---|
1881 | |
---|
1882 | /* |
---|
1883 | * The key on page h, the child page, is more likely to be |
---|
1884 | * an overflow page, so we pass its offset, rather than lp/rp's, |
---|
1885 | * into __bam_cmp. This will take advantage of __db_moff. |
---|
1886 | */ |
---|
1887 | |
---|
1888 | /* |
---|
1889 | * Skip first-item check if we're an internal page--the first |
---|
1890 | * entry on an internal page is treated specially by __bam_cmp, |
---|
1891 | * so what's on the page shouldn't matter. (Plus, since we're passing |
---|
1892 | * our page and item 0 as to __bam_cmp, we'll sort before our |
---|
1893 | * parent and falsely report a failure.) |
---|
1894 | */ |
---|
1895 | if (lp != NULL && TYPE(h) != P_IBTREE) { |
---|
1896 | if (lp->type == B_KEYDATA) { |
---|
1897 | dbt.data = lp->data; |
---|
1898 | dbt.size = lp->len; |
---|
1899 | } else if (lp->type == B_OVERFLOW) { |
---|
1900 | bo = (BOVERFLOW *)lp->data; |
---|
1901 | if ((ret = __db_goff(dbp, &dbt, bo->tlen, bo->pgno, |
---|
1902 | NULL, NULL)) != 0) |
---|
1903 | return (ret); |
---|
1904 | } else { |
---|
1905 | DB_ASSERT(0); |
---|
1906 | EPRINT((dbp->dbenv, |
---|
1907 | "Page %lu: unknown type for internal record", |
---|
1908 | (u_long)PGNO(h))); |
---|
1909 | return (EINVAL); |
---|
1910 | } |
---|
1911 | |
---|
1912 | /* On error, fall through, free if neeeded, and return. */ |
---|
1913 | if ((ret = __bam_cmp(dbp, &dbt, h, 0, func, &cmp)) == 0) { |
---|
1914 | if (cmp > 0) { |
---|
1915 | EPRINT((dbp->dbenv, |
---|
1916 | "Page %lu: first item on page sorted greater than parent entry", |
---|
1917 | (u_long)PGNO(h))); |
---|
1918 | ret = DB_VERIFY_BAD; |
---|
1919 | } |
---|
1920 | } else |
---|
1921 | EPRINT((dbp->dbenv, |
---|
1922 | "Page %lu: first item on page had comparison error", |
---|
1923 | (u_long)PGNO(h))); |
---|
1924 | |
---|
1925 | if (dbt.data != lp->data) |
---|
1926 | __os_ufree(dbp->dbenv, dbt.data); |
---|
1927 | if (ret != 0) |
---|
1928 | return (ret); |
---|
1929 | } |
---|
1930 | |
---|
1931 | if (rp != NULL) { |
---|
1932 | if (rp->type == B_KEYDATA) { |
---|
1933 | dbt.data = rp->data; |
---|
1934 | dbt.size = rp->len; |
---|
1935 | } else if (rp->type == B_OVERFLOW) { |
---|
1936 | bo = (BOVERFLOW *)rp->data; |
---|
1937 | if ((ret = __db_goff(dbp, &dbt, bo->tlen, bo->pgno, |
---|
1938 | NULL, NULL)) != 0) |
---|
1939 | return (ret); |
---|
1940 | } else { |
---|
1941 | DB_ASSERT(0); |
---|
1942 | EPRINT((dbp->dbenv, |
---|
1943 | "Page %lu: unknown type for internal record", |
---|
1944 | (u_long)PGNO(h))); |
---|
1945 | return (EINVAL); |
---|
1946 | } |
---|
1947 | |
---|
1948 | /* On error, fall through, free if neeeded, and return. */ |
---|
1949 | if ((ret = __bam_cmp(dbp, &dbt, h, last, func, &cmp)) == 0) { |
---|
1950 | if (cmp < 0) { |
---|
1951 | EPRINT((dbp->dbenv, |
---|
1952 | "Page %lu: last item on page sorted greater than parent entry", |
---|
1953 | (u_long)PGNO(h))); |
---|
1954 | ret = DB_VERIFY_BAD; |
---|
1955 | } |
---|
1956 | } else |
---|
1957 | EPRINT((dbp->dbenv, |
---|
1958 | "Page %lu: last item on page had comparison error", |
---|
1959 | (u_long)PGNO(h))); |
---|
1960 | |
---|
1961 | if (dbt.data != rp->data) |
---|
1962 | __os_ufree(dbp->dbenv, dbt.data); |
---|
1963 | } |
---|
1964 | |
---|
1965 | return (ret); |
---|
1966 | } |
---|
1967 | |
---|
1968 | /* |
---|
1969 | * __bam_salvage -- |
---|
1970 | * Safely dump out anything that looks like a key on an alleged |
---|
1971 | * btree leaf page. |
---|
1972 | * |
---|
1973 | * PUBLIC: int __bam_salvage __P((DB *, VRFY_DBINFO *, db_pgno_t, u_int32_t, |
---|
1974 | * PUBLIC: PAGE *, void *, int (*)(void *, const void *), DBT *, |
---|
1975 | * PUBLIC: u_int32_t)); |
---|
1976 | */ |
---|
1977 | int |
---|
1978 | __bam_salvage(dbp, vdp, pgno, pgtype, h, handle, callback, key, flags) |
---|
1979 | DB *dbp; |
---|
1980 | VRFY_DBINFO *vdp; |
---|
1981 | db_pgno_t pgno; |
---|
1982 | u_int32_t pgtype; |
---|
1983 | PAGE *h; |
---|
1984 | void *handle; |
---|
1985 | int (*callback) __P((void *, const void *)); |
---|
1986 | DBT *key; |
---|
1987 | u_int32_t flags; |
---|
1988 | { |
---|
1989 | DBT dbt, unkdbt; |
---|
1990 | BKEYDATA *bk; |
---|
1991 | BOVERFLOW *bo; |
---|
1992 | db_indx_t i, beg, end, *inp; |
---|
1993 | u_int32_t himark; |
---|
1994 | u_int8_t *pgmap; |
---|
1995 | void *ovflbuf; |
---|
1996 | int t_ret, ret, err_ret; |
---|
1997 | |
---|
1998 | /* Shut up lint. */ |
---|
1999 | COMPQUIET(end, 0); |
---|
2000 | |
---|
2001 | ovflbuf = pgmap = NULL; |
---|
2002 | err_ret = ret = 0; |
---|
2003 | inp = P_INP(dbp, h); |
---|
2004 | |
---|
2005 | memset(&dbt, 0, sizeof(DBT)); |
---|
2006 | dbt.flags = DB_DBT_REALLOC; |
---|
2007 | |
---|
2008 | memset(&unkdbt, 0, sizeof(DBT)); |
---|
2009 | unkdbt.size = (u_int32_t)(strlen("UNKNOWN") + 1); |
---|
2010 | unkdbt.data = "UNKNOWN"; |
---|
2011 | |
---|
2012 | /* |
---|
2013 | * Allocate a buffer for overflow items. Start at one page; |
---|
2014 | * __db_safe_goff will realloc as needed. |
---|
2015 | */ |
---|
2016 | if ((ret = __os_malloc(dbp->dbenv, dbp->pgsize, &ovflbuf)) != 0) |
---|
2017 | return (ret); |
---|
2018 | |
---|
2019 | if (LF_ISSET(DB_AGGRESSIVE)) { |
---|
2020 | if ((ret = |
---|
2021 | __os_malloc(dbp->dbenv, dbp->pgsize, &pgmap)) != 0) |
---|
2022 | goto err; |
---|
2023 | memset(pgmap, 0, dbp->pgsize); |
---|
2024 | } |
---|
2025 | |
---|
2026 | /* |
---|
2027 | * Loop through the inp array, spitting out key/data pairs. |
---|
2028 | * |
---|
2029 | * If we're salvaging normally, loop from 0 through NUM_ENT(h). |
---|
2030 | * If we're being aggressive, loop until we hit the end of the page-- |
---|
2031 | * NUM_ENT() may be bogus. |
---|
2032 | */ |
---|
2033 | himark = dbp->pgsize; |
---|
2034 | for (i = 0;; i += O_INDX) { |
---|
2035 | /* If we're not aggressive, break when we hit NUM_ENT(h). */ |
---|
2036 | if (!LF_ISSET(DB_AGGRESSIVE) && i >= NUM_ENT(h)) |
---|
2037 | break; |
---|
2038 | |
---|
2039 | /* Verify the current item. */ |
---|
2040 | ret = __db_vrfy_inpitem(dbp, |
---|
2041 | h, pgno, i, 1, flags, &himark, NULL); |
---|
2042 | /* If this returned a fatality, it's time to break. */ |
---|
2043 | if (ret == DB_VERIFY_FATAL) { |
---|
2044 | /* |
---|
2045 | * Don't return DB_VERIFY_FATAL; it's private |
---|
2046 | * and means only that we can't go on with this |
---|
2047 | * page, not with the whole database. It's |
---|
2048 | * not even an error if we've run into it |
---|
2049 | * after NUM_ENT(h). |
---|
2050 | */ |
---|
2051 | ret = (i < NUM_ENT(h)) ? DB_VERIFY_BAD : 0; |
---|
2052 | break; |
---|
2053 | } |
---|
2054 | |
---|
2055 | /* |
---|
2056 | * If this returned 0, it's safe to print or (carefully) |
---|
2057 | * try to fetch. |
---|
2058 | */ |
---|
2059 | if (ret == 0) { |
---|
2060 | /* |
---|
2061 | * We only want to print deleted items if |
---|
2062 | * DB_AGGRESSIVE is set. |
---|
2063 | */ |
---|
2064 | bk = GET_BKEYDATA(dbp, h, i); |
---|
2065 | if (!LF_ISSET(DB_AGGRESSIVE) && B_DISSET(bk->type)) |
---|
2066 | continue; |
---|
2067 | |
---|
2068 | /* |
---|
2069 | * We're going to go try to print the next item. If |
---|
2070 | * key is non-NULL, we're a dup page, so we've got to |
---|
2071 | * print the key first, unless SA_SKIPFIRSTKEY is set |
---|
2072 | * and we're on the first entry. |
---|
2073 | */ |
---|
2074 | if (key != NULL && |
---|
2075 | (i != 0 || !LF_ISSET(SA_SKIPFIRSTKEY))) |
---|
2076 | if ((ret = __db_prdbt(key, |
---|
2077 | 0, " ", handle, callback, 0, vdp)) != 0) |
---|
2078 | err_ret = ret; |
---|
2079 | |
---|
2080 | beg = inp[i]; |
---|
2081 | switch (B_TYPE(bk->type)) { |
---|
2082 | case B_DUPLICATE: |
---|
2083 | end = beg + BOVERFLOW_SIZE - 1; |
---|
2084 | /* |
---|
2085 | * If we're not on a normal btree leaf page, |
---|
2086 | * there shouldn't be off-page |
---|
2087 | * dup sets. Something's confused; just |
---|
2088 | * drop it, and the code to pick up unlinked |
---|
2089 | * offpage dup sets will print it out |
---|
2090 | * with key "UNKNOWN" later. |
---|
2091 | */ |
---|
2092 | if (pgtype != P_LBTREE) |
---|
2093 | break; |
---|
2094 | |
---|
2095 | bo = (BOVERFLOW *)bk; |
---|
2096 | |
---|
2097 | /* |
---|
2098 | * If the page number is unreasonable, or |
---|
2099 | * if this is supposed to be a key item, |
---|
2100 | * just spit out "UNKNOWN"--the best we |
---|
2101 | * can do is run into the data items in the |
---|
2102 | * unlinked offpage dup pass. |
---|
2103 | */ |
---|
2104 | if (!IS_VALID_PGNO(bo->pgno) || |
---|
2105 | (i % P_INDX == 0)) { |
---|
2106 | /* Not much to do on failure. */ |
---|
2107 | if ((ret = __db_prdbt(&unkdbt, 0, " ", |
---|
2108 | handle, callback, 0, vdp)) != 0) |
---|
2109 | err_ret = ret; |
---|
2110 | break; |
---|
2111 | } |
---|
2112 | |
---|
2113 | if ((ret = __db_salvage_duptree(dbp, |
---|
2114 | vdp, bo->pgno, &dbt, handle, callback, |
---|
2115 | flags | SA_SKIPFIRSTKEY)) != 0) |
---|
2116 | err_ret = ret; |
---|
2117 | |
---|
2118 | break; |
---|
2119 | case B_KEYDATA: |
---|
2120 | end = |
---|
2121 | ALIGN(beg + bk->len, sizeof(u_int32_t)) - 1; |
---|
2122 | dbt.data = bk->data; |
---|
2123 | dbt.size = bk->len; |
---|
2124 | if ((ret = __db_prdbt(&dbt, |
---|
2125 | 0, " ", handle, callback, 0, vdp)) != 0) |
---|
2126 | err_ret = ret; |
---|
2127 | break; |
---|
2128 | case B_OVERFLOW: |
---|
2129 | end = beg + BOVERFLOW_SIZE - 1; |
---|
2130 | bo = (BOVERFLOW *)bk; |
---|
2131 | if ((ret = __db_safe_goff(dbp, vdp, |
---|
2132 | bo->pgno, &dbt, &ovflbuf, flags)) != 0) { |
---|
2133 | err_ret = ret; |
---|
2134 | /* We care about err_ret more. */ |
---|
2135 | (void)__db_prdbt(&unkdbt, 0, " ", |
---|
2136 | handle, callback, 0, vdp); |
---|
2137 | break; |
---|
2138 | } |
---|
2139 | if ((ret = __db_prdbt(&dbt, |
---|
2140 | 0, " ", handle, callback, 0, vdp)) != 0) |
---|
2141 | err_ret = ret; |
---|
2142 | break; |
---|
2143 | default: |
---|
2144 | /* |
---|
2145 | * We should never get here; __db_vrfy_inpitem |
---|
2146 | * should not be returning 0 if bk->type |
---|
2147 | * is unrecognizable. |
---|
2148 | */ |
---|
2149 | DB_ASSERT(0); |
---|
2150 | return (EINVAL); |
---|
2151 | } |
---|
2152 | |
---|
2153 | /* |
---|
2154 | * If we're being aggressive, mark the beginning |
---|
2155 | * and end of the item; we'll come back and print |
---|
2156 | * whatever "junk" is in the gaps in case we had |
---|
2157 | * any bogus inp elements and thereby missed stuff. |
---|
2158 | */ |
---|
2159 | if (LF_ISSET(DB_AGGRESSIVE)) { |
---|
2160 | pgmap[beg] = ITEM_BEGIN; |
---|
2161 | pgmap[end] = ITEM_END; |
---|
2162 | } |
---|
2163 | } |
---|
2164 | } |
---|
2165 | |
---|
2166 | /* |
---|
2167 | * If i is odd and this is a btree leaf, we've printed out a key but not |
---|
2168 | * a datum; fix this imbalance by printing an "UNKNOWN". |
---|
2169 | */ |
---|
2170 | if (pgtype == P_LBTREE && (i % P_INDX == 1) && ((ret = |
---|
2171 | __db_prdbt(&unkdbt, 0, " ", handle, callback, 0, vdp)) != 0)) |
---|
2172 | err_ret = ret; |
---|
2173 | |
---|
2174 | err: if (pgmap != NULL) |
---|
2175 | __os_free(dbp->dbenv, pgmap); |
---|
2176 | __os_free(dbp->dbenv, ovflbuf); |
---|
2177 | |
---|
2178 | /* Mark this page as done. */ |
---|
2179 | if ((t_ret = __db_salvage_markdone(vdp, pgno)) != 0) |
---|
2180 | return (t_ret); |
---|
2181 | |
---|
2182 | return ((err_ret != 0) ? err_ret : ret); |
---|
2183 | } |
---|
2184 | |
---|
2185 | /* |
---|
2186 | * __bam_salvage_walkdupint -- |
---|
2187 | * Walk a known-good btree or recno internal page which is part of |
---|
2188 | * a dup tree, calling __db_salvage_duptree on each child page. |
---|
2189 | * |
---|
2190 | * PUBLIC: int __bam_salvage_walkdupint __P((DB *, VRFY_DBINFO *, PAGE *, |
---|
2191 | * PUBLIC: DBT *, void *, int (*)(void *, const void *), u_int32_t)); |
---|
2192 | */ |
---|
2193 | int |
---|
2194 | __bam_salvage_walkdupint(dbp, vdp, h, key, handle, callback, flags) |
---|
2195 | DB *dbp; |
---|
2196 | VRFY_DBINFO *vdp; |
---|
2197 | PAGE *h; |
---|
2198 | DBT *key; |
---|
2199 | void *handle; |
---|
2200 | int (*callback) __P((void *, const void *)); |
---|
2201 | u_int32_t flags; |
---|
2202 | { |
---|
2203 | RINTERNAL *ri; |
---|
2204 | BINTERNAL *bi; |
---|
2205 | int ret, t_ret; |
---|
2206 | db_indx_t i; |
---|
2207 | |
---|
2208 | ret = 0; |
---|
2209 | for (i = 0; i < NUM_ENT(h); i++) { |
---|
2210 | switch (TYPE(h)) { |
---|
2211 | case P_IBTREE: |
---|
2212 | bi = GET_BINTERNAL(dbp, h, i); |
---|
2213 | if ((t_ret = __db_salvage_duptree(dbp, |
---|
2214 | vdp, bi->pgno, key, handle, callback, flags)) != 0) |
---|
2215 | ret = t_ret; |
---|
2216 | break; |
---|
2217 | case P_IRECNO: |
---|
2218 | ri = GET_RINTERNAL(dbp, h, i); |
---|
2219 | if ((t_ret = __db_salvage_duptree(dbp, |
---|
2220 | vdp, ri->pgno, key, handle, callback, flags)) != 0) |
---|
2221 | ret = t_ret; |
---|
2222 | break; |
---|
2223 | default: |
---|
2224 | __db_err(dbp->dbenv, |
---|
2225 | "__bam_salvage_walkdupint called on non-int. page"); |
---|
2226 | DB_ASSERT(0); |
---|
2227 | return (EINVAL); |
---|
2228 | } |
---|
2229 | /* Pass SA_SKIPFIRSTKEY, if set, on to the 0th child only. */ |
---|
2230 | flags &= ~LF_ISSET(SA_SKIPFIRSTKEY); |
---|
2231 | } |
---|
2232 | |
---|
2233 | return (ret); |
---|
2234 | } |
---|
2235 | |
---|
2236 | /* |
---|
2237 | * __bam_meta2pgset -- |
---|
2238 | * Given a known-good meta page, return in pgsetp a 0-terminated list of |
---|
2239 | * db_pgno_t's corresponding to the pages in the btree. |
---|
2240 | * |
---|
2241 | * We do this by a somewhat sleazy method, to avoid having to traverse the |
---|
2242 | * btree structure neatly: we walk down the left side to the very |
---|
2243 | * first leaf page, then we mark all the pages in the chain of |
---|
2244 | * NEXT_PGNOs (being wary of cycles and invalid ones), then we |
---|
2245 | * consolidate our scratch array into a nice list, and return. This |
---|
2246 | * avoids the memory management hassles of recursion and the |
---|
2247 | * trouble of walking internal pages--they just don't matter, except |
---|
2248 | * for the left branch. |
---|
2249 | * |
---|
2250 | * PUBLIC: int __bam_meta2pgset __P((DB *, VRFY_DBINFO *, BTMETA *, |
---|
2251 | * PUBLIC: u_int32_t, DB *)); |
---|
2252 | */ |
---|
2253 | int |
---|
2254 | __bam_meta2pgset(dbp, vdp, btmeta, flags, pgset) |
---|
2255 | DB *dbp; |
---|
2256 | VRFY_DBINFO *vdp; |
---|
2257 | BTMETA *btmeta; |
---|
2258 | u_int32_t flags; |
---|
2259 | DB *pgset; |
---|
2260 | { |
---|
2261 | BINTERNAL *bi; |
---|
2262 | DB_MPOOLFILE *mpf; |
---|
2263 | PAGE *h; |
---|
2264 | RINTERNAL *ri; |
---|
2265 | db_pgno_t current, p; |
---|
2266 | int err_ret, ret; |
---|
2267 | |
---|
2268 | mpf = dbp->mpf; |
---|
2269 | h = NULL; |
---|
2270 | ret = err_ret = 0; |
---|
2271 | DB_ASSERT(pgset != NULL); |
---|
2272 | for (current = btmeta->root;;) { |
---|
2273 | if (!IS_VALID_PGNO(current) || current == PGNO(btmeta)) { |
---|
2274 | err_ret = DB_VERIFY_BAD; |
---|
2275 | goto err; |
---|
2276 | } |
---|
2277 | if ((ret = mpf->get(mpf, ¤t, 0, &h)) != 0) { |
---|
2278 | err_ret = ret; |
---|
2279 | goto err; |
---|
2280 | } |
---|
2281 | |
---|
2282 | switch (TYPE(h)) { |
---|
2283 | case P_IBTREE: |
---|
2284 | case P_IRECNO: |
---|
2285 | if ((ret = __bam_vrfy(dbp, |
---|
2286 | vdp, h, current, flags | DB_NOORDERCHK)) != 0) { |
---|
2287 | err_ret = ret; |
---|
2288 | goto err; |
---|
2289 | } |
---|
2290 | if (TYPE(h) == P_IBTREE) { |
---|
2291 | bi = GET_BINTERNAL(dbp, h, 0); |
---|
2292 | current = bi->pgno; |
---|
2293 | } else { /* P_IRECNO */ |
---|
2294 | ri = GET_RINTERNAL(dbp, h, 0); |
---|
2295 | current = ri->pgno; |
---|
2296 | } |
---|
2297 | break; |
---|
2298 | case P_LBTREE: |
---|
2299 | case P_LRECNO: |
---|
2300 | goto traverse; |
---|
2301 | default: |
---|
2302 | err_ret = DB_VERIFY_BAD; |
---|
2303 | goto err; |
---|
2304 | } |
---|
2305 | |
---|
2306 | if ((ret = mpf->put(mpf, h, 0)) != 0) |
---|
2307 | err_ret = ret; |
---|
2308 | h = NULL; |
---|
2309 | } |
---|
2310 | |
---|
2311 | /* |
---|
2312 | * At this point, current is the pgno of leaf page h, the 0th in the |
---|
2313 | * tree we're concerned with. |
---|
2314 | */ |
---|
2315 | traverse: |
---|
2316 | while (IS_VALID_PGNO(current) && current != PGNO_INVALID) { |
---|
2317 | if (h == NULL && (ret = mpf->get(mpf, ¤t, 0, &h)) != 0) { |
---|
2318 | err_ret = ret; |
---|
2319 | break; |
---|
2320 | } |
---|
2321 | |
---|
2322 | if ((ret = __db_vrfy_pgset_get(pgset, current, (int *)&p)) != 0) |
---|
2323 | goto err; |
---|
2324 | |
---|
2325 | if (p != 0) { |
---|
2326 | /* |
---|
2327 | * We've found a cycle. Return success anyway-- |
---|
2328 | * our caller may as well use however much of |
---|
2329 | * the pgset we've come up with. |
---|
2330 | */ |
---|
2331 | break; |
---|
2332 | } |
---|
2333 | if ((ret = __db_vrfy_pgset_inc(pgset, current)) != 0) |
---|
2334 | goto err; |
---|
2335 | |
---|
2336 | current = NEXT_PGNO(h); |
---|
2337 | if ((ret = mpf->put(mpf, h, 0)) != 0) |
---|
2338 | err_ret = ret; |
---|
2339 | h = NULL; |
---|
2340 | } |
---|
2341 | |
---|
2342 | err: if (h != NULL) |
---|
2343 | (void)mpf->put(mpf, h, 0); |
---|
2344 | |
---|
2345 | return (ret == 0 ? err_ret : ret); |
---|
2346 | } |
---|
2347 | |
---|
2348 | /* |
---|
2349 | * __bam_safe_getdata -- |
---|
2350 | * |
---|
2351 | * Utility function for __bam_vrfy_itemorder. Safely gets the datum at |
---|
2352 | * index i, page h, and sticks it in DBT dbt. If ovflok is 1 and i's an |
---|
2353 | * overflow item, we do a safe_goff to get the item and signal that we need |
---|
2354 | * to free dbt->data; if ovflok is 0, we leaves the DBT zeroed. |
---|
2355 | */ |
---|
2356 | static int |
---|
2357 | __bam_safe_getdata(dbp, h, i, ovflok, dbt, freedbtp) |
---|
2358 | DB *dbp; |
---|
2359 | PAGE *h; |
---|
2360 | u_int32_t i; |
---|
2361 | int ovflok; |
---|
2362 | DBT *dbt; |
---|
2363 | int *freedbtp; |
---|
2364 | { |
---|
2365 | BKEYDATA *bk; |
---|
2366 | BOVERFLOW *bo; |
---|
2367 | |
---|
2368 | memset(dbt, 0, sizeof(DBT)); |
---|
2369 | *freedbtp = 0; |
---|
2370 | |
---|
2371 | bk = GET_BKEYDATA(dbp, h, i); |
---|
2372 | if (B_TYPE(bk->type) == B_OVERFLOW) { |
---|
2373 | if (!ovflok) |
---|
2374 | return (0); |
---|
2375 | |
---|
2376 | bo = (BOVERFLOW *)bk; |
---|
2377 | F_SET(dbt, DB_DBT_MALLOC); |
---|
2378 | |
---|
2379 | *freedbtp = 1; |
---|
2380 | return (__db_goff(dbp, dbt, bo->tlen, bo->pgno, NULL, NULL)); |
---|
2381 | } else { |
---|
2382 | dbt->data = bk->data; |
---|
2383 | dbt->size = bk->len; |
---|
2384 | } |
---|
2385 | |
---|
2386 | return (0); |
---|
2387 | } |
---|