1 | /*- |
---|
2 | * See the file LICENSE for redistribution information. |
---|
3 | * |
---|
4 | * Copyright (c) 1999, 2000 |
---|
5 | * Sleepycat Software. All rights reserved. |
---|
6 | * |
---|
7 | * $Id: bt_verify.c,v 1.1.1.1 2002-02-11 16:30:14 ghudson Exp $ |
---|
8 | */ |
---|
9 | |
---|
10 | #include "db_config.h" |
---|
11 | |
---|
12 | #ifndef lint |
---|
13 | static const char revid[] = "$Id: bt_verify.c,v 1.1.1.1 2002-02-11 16:30:14 ghudson Exp $"; |
---|
14 | #endif /* not lint */ |
---|
15 | |
---|
16 | #ifndef NO_SYSTEM_INCLUDES |
---|
17 | #include <sys/types.h> |
---|
18 | |
---|
19 | #include <errno.h> |
---|
20 | #include <string.h> |
---|
21 | #endif |
---|
22 | |
---|
23 | #include "db_int.h" |
---|
24 | #include "db_page.h" |
---|
25 | #include "db_verify.h" |
---|
26 | #include "btree.h" |
---|
27 | |
---|
28 | static int __bam_safe_getdata __P((DB *, PAGE *, u_int32_t, int, DBT *, int *)); |
---|
29 | static int __bam_vrfy_inp __P((DB *, VRFY_DBINFO *, PAGE *, db_pgno_t, |
---|
30 | db_indx_t *, u_int32_t)); |
---|
31 | static int __bam_vrfy_treeorder __P((DB *, db_pgno_t, PAGE *, BINTERNAL *, |
---|
32 | BINTERNAL *, u_int32_t)); |
---|
33 | static int __ram_vrfy_inp __P((DB *, VRFY_DBINFO *, PAGE *, db_pgno_t, |
---|
34 | db_indx_t *, u_int32_t)); |
---|
35 | |
---|
36 | #define OKFLAGS (DB_AGGRESSIVE | DB_NOORDERCHK | DB_SALVAGE) |
---|
37 | |
---|
38 | /* |
---|
39 | * __bam_vrfy_meta -- |
---|
40 | * Verify the btree-specific part of a metadata page. |
---|
41 | * |
---|
42 | * PUBLIC: int __bam_vrfy_meta __P((DB *, VRFY_DBINFO *, BTMETA *, |
---|
43 | * PUBLIC: db_pgno_t, u_int32_t)); |
---|
44 | */ |
---|
45 | int |
---|
46 | __bam_vrfy_meta(dbp, vdp, meta, pgno, flags) |
---|
47 | DB *dbp; |
---|
48 | VRFY_DBINFO *vdp; |
---|
49 | BTMETA *meta; |
---|
50 | db_pgno_t pgno; |
---|
51 | u_int32_t flags; |
---|
52 | { |
---|
53 | VRFY_PAGEINFO *pip; |
---|
54 | int isbad, t_ret, ret; |
---|
55 | db_indx_t ovflsize; |
---|
56 | |
---|
57 | if ((ret = __db_vrfy_getpageinfo(vdp, pgno, &pip)) != 0) |
---|
58 | return (ret); |
---|
59 | |
---|
60 | isbad = 0; |
---|
61 | |
---|
62 | /* |
---|
63 | * If VRFY_INCOMPLETE is not set, then we didn't come through |
---|
64 | * __db_vrfy_pagezero and didn't incompletely |
---|
65 | * check this page--we haven't checked it at all. |
---|
66 | * Thus we need to call __db_vrfy_meta and check the common fields. |
---|
67 | * |
---|
68 | * If VRFY_INCOMPLETE is set, we've already done all the same work |
---|
69 | * in __db_vrfy_pagezero, so skip the check. |
---|
70 | */ |
---|
71 | if (!F_ISSET(pip, VRFY_INCOMPLETE) && |
---|
72 | (ret = __db_vrfy_meta(dbp, vdp, &meta->dbmeta, pgno, flags)) != 0) { |
---|
73 | if (ret == DB_VERIFY_BAD) |
---|
74 | isbad = 1; |
---|
75 | else |
---|
76 | goto err; |
---|
77 | } |
---|
78 | |
---|
79 | /* bt_minkey: must be >= 2, must produce sensible ovflsize */ |
---|
80 | ovflsize = meta->minkey > 0 ? B_MINKEY_TO_OVFLSIZE(meta->minkey, |
---|
81 | dbp->pgsize) : 0; /* avoid division by zero */ |
---|
82 | if (meta->minkey < 2 || |
---|
83 | ovflsize > B_MINKEY_TO_OVFLSIZE(DEFMINKEYPAGE, dbp->pgsize)) { |
---|
84 | pip->bt_minkey = 0; |
---|
85 | isbad = 1; |
---|
86 | EPRINT((dbp->dbenv, |
---|
87 | "Nonsensical bt_minkey value %lu on metadata page %lu", |
---|
88 | meta->minkey, pgno)); |
---|
89 | } else |
---|
90 | pip->bt_minkey = meta->minkey; |
---|
91 | |
---|
92 | /* bt_maxkey: no constraints (XXX: right?) */ |
---|
93 | pip->bt_maxkey = meta->maxkey; |
---|
94 | |
---|
95 | /* re_len: no constraints on this (may be zero or huge--we make rope) */ |
---|
96 | pip->re_len = meta->re_len; |
---|
97 | |
---|
98 | /* |
---|
99 | * The root must not be current page or 0 and it must be within |
---|
100 | * database. If this metadata page is the master meta data page |
---|
101 | * of the file, then the root page had better be page 1. |
---|
102 | */ |
---|
103 | pip->root = 0; |
---|
104 | if (meta->root == PGNO_INVALID |
---|
105 | || meta->root == pgno || !IS_VALID_PGNO(meta->root) || |
---|
106 | (pgno == PGNO_BASE_MD && meta->root != 1)) { |
---|
107 | isbad = 1; |
---|
108 | EPRINT((dbp->dbenv, |
---|
109 | "Nonsensical root page %lu on metadata page %lu", |
---|
110 | meta->root, vdp->last_pgno)); |
---|
111 | } else |
---|
112 | pip->root = meta->root; |
---|
113 | |
---|
114 | /* Flags. */ |
---|
115 | if (F_ISSET(&meta->dbmeta, BTM_RENUMBER)) |
---|
116 | F_SET(pip, VRFY_IS_RRECNO); |
---|
117 | |
---|
118 | if (F_ISSET(&meta->dbmeta, BTM_SUBDB)) { |
---|
119 | /* |
---|
120 | * If this is a master db meta page, it had better not have |
---|
121 | * duplicates. |
---|
122 | */ |
---|
123 | if (F_ISSET(&meta->dbmeta, BTM_DUP) && pgno == PGNO_BASE_MD) { |
---|
124 | isbad = 1; |
---|
125 | EPRINT((dbp->dbenv, |
---|
126 | "Btree metadata page %lu has both duplicates and multiple databases", |
---|
127 | pgno)); |
---|
128 | } |
---|
129 | F_SET(pip, VRFY_HAS_SUBDBS); |
---|
130 | } |
---|
131 | |
---|
132 | if (F_ISSET(&meta->dbmeta, BTM_DUP)) |
---|
133 | F_SET(pip, VRFY_HAS_DUPS); |
---|
134 | if (F_ISSET(&meta->dbmeta, BTM_DUPSORT)) |
---|
135 | F_SET(pip, VRFY_HAS_DUPSORT); |
---|
136 | if (F_ISSET(&meta->dbmeta, BTM_RECNUM)) |
---|
137 | F_SET(pip, VRFY_HAS_RECNUMS); |
---|
138 | if (F_ISSET(pip, VRFY_HAS_RECNUMS) && F_ISSET(pip, VRFY_HAS_DUPS)) { |
---|
139 | EPRINT((dbp->dbenv, |
---|
140 | "Btree metadata page %lu illegally has both recnums and dups", |
---|
141 | pgno)); |
---|
142 | isbad = 1; |
---|
143 | } |
---|
144 | |
---|
145 | if (F_ISSET(&meta->dbmeta, BTM_RECNO)) { |
---|
146 | F_SET(pip, VRFY_IS_RECNO); |
---|
147 | dbp->type = DB_RECNO; |
---|
148 | } else if (F_ISSET(pip, VRFY_IS_RRECNO)) { |
---|
149 | isbad = 1; |
---|
150 | EPRINT((dbp->dbenv, |
---|
151 | "Metadata page %lu has renumber flag set but is not recno", |
---|
152 | pgno)); |
---|
153 | } |
---|
154 | |
---|
155 | if (F_ISSET(pip, VRFY_IS_RECNO) && F_ISSET(pip, VRFY_HAS_DUPS)) { |
---|
156 | EPRINT((dbp->dbenv, |
---|
157 | "Recno metadata page %lu specifies duplicates", pgno)); |
---|
158 | isbad = 1; |
---|
159 | } |
---|
160 | |
---|
161 | if (F_ISSET(&meta->dbmeta, BTM_FIXEDLEN)) |
---|
162 | F_SET(pip, VRFY_IS_FIXEDLEN); |
---|
163 | else if (pip->re_len > 0) { |
---|
164 | /* |
---|
165 | * It's wrong to have an re_len if it's not a fixed-length |
---|
166 | * database |
---|
167 | */ |
---|
168 | isbad = 1; |
---|
169 | EPRINT((dbp->dbenv, |
---|
170 | "re_len of %lu in non-fixed-length database", |
---|
171 | pip->re_len)); |
---|
172 | } |
---|
173 | |
---|
174 | /* |
---|
175 | * We do not check that the rest of the page is 0, because it may |
---|
176 | * not be and may still be correct. |
---|
177 | */ |
---|
178 | |
---|
179 | err: if ((t_ret = __db_vrfy_putpageinfo(vdp, pip)) != 0 && ret == 0) |
---|
180 | ret = t_ret; |
---|
181 | return ((ret == 0 && isbad == 1) ? DB_VERIFY_BAD : ret); |
---|
182 | } |
---|
183 | |
---|
184 | /* |
---|
185 | * __ram_vrfy_leaf -- |
---|
186 | * Verify a recno leaf page. |
---|
187 | * |
---|
188 | * PUBLIC: int __ram_vrfy_leaf __P((DB *, VRFY_DBINFO *, PAGE *, db_pgno_t, |
---|
189 | * PUBLIC: u_int32_t)); |
---|
190 | */ |
---|
191 | int |
---|
192 | __ram_vrfy_leaf(dbp, vdp, h, pgno, flags) |
---|
193 | DB *dbp; |
---|
194 | VRFY_DBINFO *vdp; |
---|
195 | PAGE *h; |
---|
196 | db_pgno_t pgno; |
---|
197 | u_int32_t flags; |
---|
198 | { |
---|
199 | BKEYDATA *bk; |
---|
200 | VRFY_PAGEINFO *pip; |
---|
201 | db_indx_t i; |
---|
202 | int ret, t_ret, isbad; |
---|
203 | u_int32_t re_len_guess, len; |
---|
204 | |
---|
205 | isbad = 0; |
---|
206 | if ((ret = __db_vrfy_getpageinfo(vdp, pgno, &pip)) != 0) |
---|
207 | return (ret); |
---|
208 | |
---|
209 | if ((ret = __db_fchk(dbp->dbenv, |
---|
210 | "__ram_vrfy_leaf", flags, OKFLAGS)) != 0) |
---|
211 | goto err; |
---|
212 | |
---|
213 | if (TYPE(h) != P_LRECNO) { |
---|
214 | /* We should not have been called. */ |
---|
215 | TYPE_ERR_PRINT(dbp->dbenv, "__ram_vrfy_leaf", pgno, TYPE(h)); |
---|
216 | DB_ASSERT(0); |
---|
217 | ret = EINVAL; |
---|
218 | goto err; |
---|
219 | } |
---|
220 | |
---|
221 | /* |
---|
222 | * Verify (and, if relevant, save off) page fields common to |
---|
223 | * all PAGEs. |
---|
224 | */ |
---|
225 | if ((ret = __db_vrfy_datapage(dbp, vdp, h, pgno, flags)) != 0) { |
---|
226 | if (ret == DB_VERIFY_BAD) |
---|
227 | isbad = 1; |
---|
228 | else |
---|
229 | goto err; |
---|
230 | } |
---|
231 | |
---|
232 | /* |
---|
233 | * Verify inp[]. Return immediately if it returns DB_VERIFY_BAD; |
---|
234 | * further checks are dangerous. |
---|
235 | */ |
---|
236 | if ((ret = __bam_vrfy_inp(dbp, |
---|
237 | vdp, h, pgno, &pip->entries, flags)) != 0) |
---|
238 | goto err; |
---|
239 | |
---|
240 | if (F_ISSET(pip, VRFY_HAS_DUPS)) { |
---|
241 | EPRINT((dbp->dbenv, |
---|
242 | "Recno database has dups on page %lu", pgno)); |
---|
243 | ret = DB_VERIFY_BAD; |
---|
244 | goto err; |
---|
245 | } |
---|
246 | |
---|
247 | /* |
---|
248 | * Walk through inp and see if the lengths of all the records are the |
---|
249 | * same--if so, this may be a fixed-length database, and we want to |
---|
250 | * save off this value. We know inp to be safe if we've gotten this |
---|
251 | * far. |
---|
252 | */ |
---|
253 | re_len_guess = 0; |
---|
254 | for (i = 0; i < NUM_ENT(h); i++) { |
---|
255 | bk = GET_BKEYDATA(h, i); |
---|
256 | /* KEYEMPTY. Go on. */ |
---|
257 | if (B_DISSET(bk->type)) |
---|
258 | continue; |
---|
259 | if (bk->type == B_OVERFLOW) |
---|
260 | len = ((BOVERFLOW *)bk)->tlen; |
---|
261 | else if (bk->type == B_KEYDATA) |
---|
262 | len = bk->len; |
---|
263 | else { |
---|
264 | isbad = 1; |
---|
265 | EPRINT((dbp->dbenv, "Nonsensical type for item %lu, page %lu", |
---|
266 | i, pgno)); |
---|
267 | continue; |
---|
268 | } |
---|
269 | if (re_len_guess == 0) |
---|
270 | re_len_guess = len; |
---|
271 | |
---|
272 | /* |
---|
273 | * Is this item's len the same as the last one's? If not, |
---|
274 | * reset to 0 and break--we don't have a single re_len. |
---|
275 | * Otherwise, go on to the next item. |
---|
276 | */ |
---|
277 | if (re_len_guess != len) { |
---|
278 | re_len_guess = 0; |
---|
279 | break; |
---|
280 | } |
---|
281 | } |
---|
282 | pip->re_len = re_len_guess; |
---|
283 | |
---|
284 | /* Save off record count. */ |
---|
285 | pip->rec_cnt = NUM_ENT(h); |
---|
286 | |
---|
287 | err: if ((t_ret = __db_vrfy_putpageinfo(vdp, pip)) != 0 && ret == 0) |
---|
288 | ret = t_ret; |
---|
289 | return ((ret == 0 && isbad == 1) ? DB_VERIFY_BAD : 0); |
---|
290 | } |
---|
291 | |
---|
292 | /* |
---|
293 | * __bam_vrfy -- |
---|
294 | * Verify a btree leaf or internal page. |
---|
295 | * |
---|
296 | * PUBLIC: int __bam_vrfy __P((DB *, VRFY_DBINFO *, PAGE *, db_pgno_t, |
---|
297 | * PUBLIC: u_int32_t)); |
---|
298 | */ |
---|
299 | int |
---|
300 | __bam_vrfy(dbp, vdp, h, pgno, flags) |
---|
301 | DB *dbp; |
---|
302 | VRFY_DBINFO *vdp; |
---|
303 | PAGE *h; |
---|
304 | db_pgno_t pgno; |
---|
305 | u_int32_t flags; |
---|
306 | { |
---|
307 | VRFY_PAGEINFO *pip; |
---|
308 | int ret, t_ret, isbad; |
---|
309 | |
---|
310 | isbad = 0; |
---|
311 | if ((ret = __db_vrfy_getpageinfo(vdp, pgno, &pip)) != 0) |
---|
312 | return (ret); |
---|
313 | |
---|
314 | switch (TYPE(h)) { |
---|
315 | case P_IBTREE: |
---|
316 | case P_IRECNO: |
---|
317 | case P_LBTREE: |
---|
318 | case P_LDUP: |
---|
319 | break; |
---|
320 | default: |
---|
321 | TYPE_ERR_PRINT(dbp->dbenv, "__bam_vrfy", pgno, TYPE(h)); |
---|
322 | DB_ASSERT(0); |
---|
323 | ret = EINVAL; |
---|
324 | goto err; |
---|
325 | } |
---|
326 | |
---|
327 | /* |
---|
328 | * Verify (and, if relevant, save off) page fields common to |
---|
329 | * all PAGEs. |
---|
330 | */ |
---|
331 | if ((ret = __db_vrfy_datapage(dbp, vdp, h, pgno, flags)) != 0) { |
---|
332 | if (ret == DB_VERIFY_BAD) |
---|
333 | isbad = 1; |
---|
334 | else |
---|
335 | goto err; |
---|
336 | } |
---|
337 | |
---|
338 | /* |
---|
339 | * The record count is, on internal pages, stored in an overloaded |
---|
340 | * next_pgno field. Save it off; we'll verify it when we check |
---|
341 | * overall database structure. We could overload the field |
---|
342 | * in VRFY_PAGEINFO, too, but this seems gross, and space |
---|
343 | * is not at such a premium. |
---|
344 | */ |
---|
345 | pip->rec_cnt = RE_NREC(h); |
---|
346 | |
---|
347 | /* |
---|
348 | * Verify inp[]. |
---|
349 | */ |
---|
350 | if (TYPE(h) == P_IRECNO) { |
---|
351 | if ((ret = __ram_vrfy_inp(dbp, |
---|
352 | vdp, h, pgno, &pip->entries, flags)) != 0) |
---|
353 | goto err; |
---|
354 | } else if ((ret = __bam_vrfy_inp(dbp, |
---|
355 | vdp, h, pgno, &pip->entries, flags)) != 0) { |
---|
356 | if (ret == DB_VERIFY_BAD) |
---|
357 | isbad = 1; |
---|
358 | else |
---|
359 | goto err; |
---|
360 | EPRINT((dbp->dbenv, |
---|
361 | "item order check on page %lu unsafe: skipping", pgno)); |
---|
362 | } else if (!LF_ISSET(DB_NOORDERCHK) && (ret = |
---|
363 | __bam_vrfy_itemorder(dbp, vdp, h, pgno, 0, 0, 0, flags)) != 0) { |
---|
364 | /* |
---|
365 | * We know that the elements of inp are reasonable. |
---|
366 | * |
---|
367 | * Check that elements fall in the proper order. |
---|
368 | */ |
---|
369 | if (ret == DB_VERIFY_BAD) |
---|
370 | isbad = 1; |
---|
371 | else |
---|
372 | goto err; |
---|
373 | } |
---|
374 | |
---|
375 | err: if ((t_ret = __db_vrfy_putpageinfo(vdp, pip)) != 0 && ret == 0) |
---|
376 | ret = t_ret; |
---|
377 | return ((ret == 0 && isbad == 1) ? DB_VERIFY_BAD : 0); |
---|
378 | } |
---|
379 | |
---|
380 | /* |
---|
381 | * __ram_vrfy_inp -- |
---|
382 | * Verify that all entries in a P_IRECNO inp[] array are reasonable, |
---|
383 | * and count them. Note that P_LRECNO uses __bam_vrfy_inp; |
---|
384 | * P_IRECNOs are a special, and simpler, case, since they have |
---|
385 | * RINTERNALs rather than BKEYDATA/BINTERNALs. |
---|
386 | */ |
---|
387 | static int |
---|
388 | __ram_vrfy_inp(dbp, vdp, h, pgno, nentriesp, flags) |
---|
389 | DB *dbp; |
---|
390 | VRFY_DBINFO *vdp; |
---|
391 | PAGE *h; |
---|
392 | db_pgno_t pgno; |
---|
393 | db_indx_t *nentriesp; |
---|
394 | u_int32_t flags; |
---|
395 | { |
---|
396 | RINTERNAL *ri; |
---|
397 | VRFY_CHILDINFO child; |
---|
398 | VRFY_PAGEINFO *pip; |
---|
399 | int ret, t_ret, isbad; |
---|
400 | u_int32_t himark, i, offset, nentries; |
---|
401 | u_int8_t *pagelayout, *p; |
---|
402 | |
---|
403 | isbad = 0; |
---|
404 | memset(&child, 0, sizeof(VRFY_CHILDINFO)); |
---|
405 | nentries = 0; |
---|
406 | pagelayout = NULL; |
---|
407 | |
---|
408 | if ((ret = __db_vrfy_getpageinfo(vdp, pgno, &pip)) != 0) |
---|
409 | return (ret); |
---|
410 | |
---|
411 | if (TYPE(h) != P_IRECNO) { |
---|
412 | TYPE_ERR_PRINT(dbp->dbenv, "__ram_vrfy_inp", pgno, TYPE(h)); |
---|
413 | DB_ASSERT(0); |
---|
414 | ret = EINVAL; |
---|
415 | goto err; |
---|
416 | } |
---|
417 | |
---|
418 | himark = dbp->pgsize; |
---|
419 | if ((ret = |
---|
420 | __os_malloc(dbp->dbenv, dbp->pgsize, NULL, &pagelayout)) != 0) |
---|
421 | goto err; |
---|
422 | memset(pagelayout, 0, dbp->pgsize); |
---|
423 | for (i = 0; i < NUM_ENT(h); i++) { |
---|
424 | if ((u_int8_t *)h->inp + i >= (u_int8_t *)h + himark) { |
---|
425 | EPRINT((dbp->dbenv, |
---|
426 | "Page %lu entries listing %lu overlaps data", |
---|
427 | pgno, i)); |
---|
428 | ret = DB_VERIFY_BAD; |
---|
429 | goto err; |
---|
430 | } |
---|
431 | offset = h->inp[i]; |
---|
432 | /* |
---|
433 | * Check that the item offset is reasonable: it points |
---|
434 | * somewhere after the inp array and before the end of the |
---|
435 | * page. |
---|
436 | */ |
---|
437 | if (offset <= (u_int32_t)((u_int8_t *)h->inp + i - |
---|
438 | (u_int8_t *)h) || |
---|
439 | offset > (u_int32_t)(dbp->pgsize - RINTERNAL_SIZE)) { |
---|
440 | isbad = 1; |
---|
441 | EPRINT((dbp->dbenv, |
---|
442 | "Bad offset %lu at page %lu index %lu", |
---|
443 | offset, pgno, i)); |
---|
444 | continue; |
---|
445 | } |
---|
446 | |
---|
447 | /* Update the high-water mark (what HOFFSET should be) */ |
---|
448 | if (offset < himark) |
---|
449 | himark = offset; |
---|
450 | |
---|
451 | nentries++; |
---|
452 | |
---|
453 | /* Make sure this RINTERNAL is not multiply referenced. */ |
---|
454 | ri = GET_RINTERNAL(h, i); |
---|
455 | if (pagelayout[offset] == 0) { |
---|
456 | pagelayout[offset] = 1; |
---|
457 | child.pgno = ri->pgno; |
---|
458 | child.type = V_RECNO; |
---|
459 | child.nrecs = ri->nrecs; |
---|
460 | if ((ret = __db_vrfy_childput(vdp, pgno, &child)) != 0) |
---|
461 | goto err; |
---|
462 | } else { |
---|
463 | EPRINT((dbp->dbenv, |
---|
464 | "RINTERNAL structure at offset %lu, page %lu referenced twice", |
---|
465 | offset, pgno)); |
---|
466 | isbad = 1; |
---|
467 | } |
---|
468 | } |
---|
469 | |
---|
470 | for (p = pagelayout + himark; |
---|
471 | p < pagelayout + dbp->pgsize; |
---|
472 | p += RINTERNAL_SIZE) |
---|
473 | if (*p != 1) { |
---|
474 | EPRINT((dbp->dbenv, |
---|
475 | "Gap between items at offset %lu, page %lu", |
---|
476 | p - pagelayout, pgno)); |
---|
477 | isbad = 1; |
---|
478 | } |
---|
479 | |
---|
480 | if ((db_indx_t)himark != HOFFSET(h)) { |
---|
481 | EPRINT((dbp->dbenv, "Bad HOFFSET %lu, appears to be %lu", |
---|
482 | HOFFSET(h), himark)); |
---|
483 | isbad = 1; |
---|
484 | } |
---|
485 | |
---|
486 | *nentriesp = nentries; |
---|
487 | |
---|
488 | err: if ((t_ret = __db_vrfy_putpageinfo(vdp, pip)) != 0 && ret == 0) |
---|
489 | ret = t_ret; |
---|
490 | if (pagelayout != NULL) |
---|
491 | __os_free(pagelayout, dbp->pgsize); |
---|
492 | return ((ret == 0 && isbad == 1) ? DB_VERIFY_BAD : ret); |
---|
493 | } |
---|
494 | |
---|
495 | /* |
---|
496 | * __bam_vrfy_inp -- |
---|
497 | * Verify that all entries in inp[] array are reasonable; |
---|
498 | * count them. |
---|
499 | */ |
---|
500 | static int |
---|
501 | __bam_vrfy_inp(dbp, vdp, h, pgno, nentriesp, flags) |
---|
502 | DB *dbp; |
---|
503 | VRFY_DBINFO *vdp; |
---|
504 | PAGE *h; |
---|
505 | db_pgno_t pgno; |
---|
506 | db_indx_t *nentriesp; |
---|
507 | u_int32_t flags; |
---|
508 | { |
---|
509 | BKEYDATA *bk; |
---|
510 | BOVERFLOW *bo; |
---|
511 | VRFY_CHILDINFO child; |
---|
512 | VRFY_PAGEINFO *pip; |
---|
513 | int isbad, initem, isdupitem, ret, t_ret; |
---|
514 | u_int32_t himark, offset; /* These would be db_indx_ts but for algnmt.*/ |
---|
515 | u_int32_t i, endoff, nentries; |
---|
516 | u_int8_t *pagelayout; |
---|
517 | |
---|
518 | isbad = isdupitem = 0; |
---|
519 | nentries = 0; |
---|
520 | memset(&child, 0, sizeof(VRFY_CHILDINFO)); |
---|
521 | if ((ret = __db_vrfy_getpageinfo(vdp, pgno, &pip)) != 0) |
---|
522 | return (ret); |
---|
523 | |
---|
524 | switch (TYPE(h)) { |
---|
525 | case P_IBTREE: |
---|
526 | case P_LBTREE: |
---|
527 | case P_LDUP: |
---|
528 | case P_LRECNO: |
---|
529 | break; |
---|
530 | default: |
---|
531 | /* |
---|
532 | * In the salvager, we might call this from a page which |
---|
533 | * we merely suspect is a btree page. Otherwise, it |
---|
534 | * shouldn't get called--if it is, that's a verifier bug. |
---|
535 | */ |
---|
536 | if (LF_ISSET(DB_SALVAGE)) |
---|
537 | break; |
---|
538 | TYPE_ERR_PRINT(dbp->dbenv, "__bam_vrfy_inp", pgno, TYPE(h)); |
---|
539 | DB_ASSERT(0); |
---|
540 | ret = EINVAL; |
---|
541 | goto err; |
---|
542 | } |
---|
543 | |
---|
544 | /* |
---|
545 | * Loop through inp[], the array of items, until we either |
---|
546 | * run out of entries or collide with the data. Keep track |
---|
547 | * of h_offset in himark. |
---|
548 | * |
---|
549 | * For each element in inp[i], make sure it references a region |
---|
550 | * that starts after the end of the inp array (as defined by |
---|
551 | * NUM_ENT(h)), ends before the beginning of the page, doesn't |
---|
552 | * overlap any other regions, and doesn't have a gap between |
---|
553 | * it and the region immediately after it. |
---|
554 | */ |
---|
555 | himark = dbp->pgsize; |
---|
556 | if ((ret = __os_malloc(dbp->dbenv, |
---|
557 | dbp->pgsize, NULL, &pagelayout)) != 0) |
---|
558 | goto err; |
---|
559 | memset(pagelayout, 0, dbp->pgsize); |
---|
560 | for (i = 0; i < NUM_ENT(h); i++) { |
---|
561 | |
---|
562 | ret = __db_vrfy_inpitem(dbp, |
---|
563 | h, pgno, i, 1, flags, &himark, &offset); |
---|
564 | if (ret == DB_VERIFY_BAD) { |
---|
565 | isbad = 1; |
---|
566 | continue; |
---|
567 | } else if (ret == DB_VERIFY_FATAL) { |
---|
568 | isbad = 1; |
---|
569 | goto err; |
---|
570 | } else if (ret != 0) |
---|
571 | DB_ASSERT(0); |
---|
572 | |
---|
573 | /* |
---|
574 | * We now have a plausible beginning for the item, and we know |
---|
575 | * its length is safe. |
---|
576 | * |
---|
577 | * Mark the beginning and end in pagelayout so we can make sure |
---|
578 | * items have no overlaps or gaps. |
---|
579 | */ |
---|
580 | bk = GET_BKEYDATA(h, i); |
---|
581 | #define ITEM_BEGIN 1 |
---|
582 | #define ITEM_END 2 |
---|
583 | if (pagelayout[offset] == 0) |
---|
584 | pagelayout[offset] = ITEM_BEGIN; |
---|
585 | else if (pagelayout[offset] == ITEM_BEGIN) { |
---|
586 | /* |
---|
587 | * Having two inp entries that point at the same patch |
---|
588 | * of page is legal if and only if the page is |
---|
589 | * a btree leaf and they're onpage duplicate keys-- |
---|
590 | * that is, if (i % P_INDX) == 0. |
---|
591 | */ |
---|
592 | if ((i % P_INDX == 0) && (TYPE(h) == P_LBTREE)) { |
---|
593 | /* Flag for later. */ |
---|
594 | F_SET(pip, VRFY_HAS_DUPS); |
---|
595 | |
---|
596 | /* Bump up nentries so we don't undercount. */ |
---|
597 | nentries++; |
---|
598 | |
---|
599 | /* |
---|
600 | * We'll check to make sure the end is |
---|
601 | * equal, too. |
---|
602 | */ |
---|
603 | isdupitem = 1; |
---|
604 | } else { |
---|
605 | isbad = 1; |
---|
606 | EPRINT((dbp->dbenv, "Duplicated item %lu on page %lu", |
---|
607 | i, pgno)); |
---|
608 | } |
---|
609 | } |
---|
610 | |
---|
611 | /* |
---|
612 | * Mark the end. Its location varies with the page type |
---|
613 | * and the item type. |
---|
614 | * |
---|
615 | * If the end already has a sign other than 0, do nothing-- |
---|
616 | * it's an overlap that we'll catch later. |
---|
617 | */ |
---|
618 | switch(B_TYPE(bk->type)) { |
---|
619 | case B_KEYDATA: |
---|
620 | if (TYPE(h) == P_IBTREE) |
---|
621 | /* It's a BINTERNAL. */ |
---|
622 | endoff = offset + BINTERNAL_SIZE(bk->len) - 1; |
---|
623 | else |
---|
624 | endoff = offset + BKEYDATA_SIZE(bk->len) - 1; |
---|
625 | break; |
---|
626 | case B_DUPLICATE: |
---|
627 | /* |
---|
628 | * Flag that we have dups; we'll check whether |
---|
629 | * that's okay during the structure check. |
---|
630 | */ |
---|
631 | F_SET(pip, VRFY_HAS_DUPS); |
---|
632 | /* FALLTHROUGH */ |
---|
633 | case B_OVERFLOW: |
---|
634 | /* |
---|
635 | * Overflow entries on internal pages are stored |
---|
636 | * as the _data_ of a BINTERNAL; overflow entries |
---|
637 | * on leaf pages are stored as the entire entry. |
---|
638 | */ |
---|
639 | endoff = offset + |
---|
640 | ((TYPE(h) == P_IBTREE) ? |
---|
641 | BINTERNAL_SIZE(BOVERFLOW_SIZE) : |
---|
642 | BOVERFLOW_SIZE) - 1; |
---|
643 | break; |
---|
644 | default: |
---|
645 | /* |
---|
646 | * We'll complain later; for now, just mark |
---|
647 | * a minimum. |
---|
648 | */ |
---|
649 | endoff = offset + BKEYDATA_SIZE(0) - 1; |
---|
650 | break; |
---|
651 | } |
---|
652 | |
---|
653 | /* |
---|
654 | * If this is an onpage duplicate key we've seen before, |
---|
655 | * the end had better coincide too. |
---|
656 | */ |
---|
657 | if (isdupitem && pagelayout[endoff] != ITEM_END) { |
---|
658 | EPRINT((dbp->dbenv, "Duplicated item %lu on page %lu", i, |
---|
659 | pgno)); |
---|
660 | isbad = 1; |
---|
661 | } else if (pagelayout[endoff] == 0) |
---|
662 | pagelayout[endoff] = ITEM_END; |
---|
663 | isdupitem = 0; |
---|
664 | |
---|
665 | /* |
---|
666 | * There should be no deleted items in a quiescent tree, |
---|
667 | * except in recno. |
---|
668 | */ |
---|
669 | if (B_DISSET(bk->type) && TYPE(h) != P_LRECNO) { |
---|
670 | isbad = 1; |
---|
671 | EPRINT((dbp->dbenv, |
---|
672 | "Item %lu on page %lu marked deleted", i, pgno)); |
---|
673 | } |
---|
674 | |
---|
675 | /* |
---|
676 | * Check the type and such of bk--make sure it's reasonable |
---|
677 | * for the pagetype. |
---|
678 | */ |
---|
679 | switch (B_TYPE(bk->type)) { |
---|
680 | case B_KEYDATA: |
---|
681 | /* |
---|
682 | * This is a normal, non-overflow BKEYDATA or BINTERNAL. |
---|
683 | * The only thing to check is the len, and that's |
---|
684 | * already been done. |
---|
685 | */ |
---|
686 | break; |
---|
687 | case B_DUPLICATE: |
---|
688 | if (TYPE(h) == P_IBTREE) { |
---|
689 | isbad = 1; |
---|
690 | EPRINT((dbp->dbenv, |
---|
691 | "Duplicate page referenced by internal btree page %lu at item %lu", |
---|
692 | pgno, i)); |
---|
693 | break; |
---|
694 | } else if (TYPE(h) == P_LRECNO) { |
---|
695 | isbad = 1; |
---|
696 | EPRINT((dbp->dbenv, |
---|
697 | "Duplicate page referenced by recno page %lu at item %lu", |
---|
698 | pgno, i)); |
---|
699 | break; |
---|
700 | } |
---|
701 | /* FALLTHROUGH */ |
---|
702 | case B_OVERFLOW: |
---|
703 | bo = (TYPE(h) == P_IBTREE) ? |
---|
704 | (BOVERFLOW *)(((BINTERNAL *)bk)->data) : |
---|
705 | (BOVERFLOW *)bk; |
---|
706 | |
---|
707 | if (B_TYPE(bk->type) == B_OVERFLOW) |
---|
708 | /* Make sure tlen is reasonable. */ |
---|
709 | if (bo->tlen > dbp->pgsize * vdp->last_pgno) { |
---|
710 | isbad = 1; |
---|
711 | EPRINT((dbp->dbenv, |
---|
712 | "Impossible tlen %lu, item %lu, page %lu", |
---|
713 | bo->tlen, i, pgno)); |
---|
714 | /* Don't save as a child. */ |
---|
715 | break; |
---|
716 | } |
---|
717 | |
---|
718 | if (!IS_VALID_PGNO(bo->pgno) || bo->pgno == pgno || |
---|
719 | bo->pgno == PGNO_INVALID) { |
---|
720 | isbad = 1; |
---|
721 | EPRINT((dbp->dbenv, |
---|
722 | "Offpage item %lu, page %lu has bad pgno", |
---|
723 | i, pgno)); |
---|
724 | /* Don't save as a child. */ |
---|
725 | break; |
---|
726 | } |
---|
727 | |
---|
728 | child.pgno = bo->pgno; |
---|
729 | child.type = (B_TYPE(bk->type) == B_OVERFLOW ? |
---|
730 | V_OVERFLOW : V_DUPLICATE); |
---|
731 | child.tlen = bo->tlen; |
---|
732 | if ((ret = __db_vrfy_childput(vdp, pgno, &child)) != 0) |
---|
733 | goto err; |
---|
734 | break; |
---|
735 | default: |
---|
736 | isbad = 1; |
---|
737 | EPRINT((dbp->dbenv, |
---|
738 | "Item %lu on page %lu of invalid type %lu", |
---|
739 | i, pgno)); |
---|
740 | break; |
---|
741 | } |
---|
742 | } |
---|
743 | |
---|
744 | /* |
---|
745 | * Now, loop through and make sure the items are contiguous and |
---|
746 | * non-overlapping. |
---|
747 | */ |
---|
748 | initem = 0; |
---|
749 | for (i = himark; i < dbp->pgsize; i++) |
---|
750 | if (initem == 0) |
---|
751 | switch (pagelayout[i]) { |
---|
752 | case 0: |
---|
753 | /* May be just for alignment. */ |
---|
754 | if (i != ALIGN(i, 4)) |
---|
755 | continue; |
---|
756 | |
---|
757 | isbad = 1; |
---|
758 | EPRINT((dbp->dbenv, |
---|
759 | "Gap between items, page %lu offset %lu", |
---|
760 | pgno, i)); |
---|
761 | /* Find the end of the gap */ |
---|
762 | for ( ; pagelayout[i + 1] == 0 && |
---|
763 | (size_t)(i + 1) < dbp->pgsize; i++) |
---|
764 | ; |
---|
765 | break; |
---|
766 | case ITEM_BEGIN: |
---|
767 | /* We've found an item. Check its alignment. */ |
---|
768 | if (i != ALIGN(i, 4)) { |
---|
769 | isbad = 1; |
---|
770 | EPRINT((dbp->dbenv, |
---|
771 | "Offset %lu page %lu unaligned", |
---|
772 | i, pgno)); |
---|
773 | } |
---|
774 | initem = 1; |
---|
775 | nentries++; |
---|
776 | break; |
---|
777 | case ITEM_END: |
---|
778 | /* |
---|
779 | * We've hit the end of an item even though |
---|
780 | * we don't think we're in one; must |
---|
781 | * be an overlap. |
---|
782 | */ |
---|
783 | isbad = 1; |
---|
784 | EPRINT((dbp->dbenv, |
---|
785 | "Overlapping items, page %lu offset %lu", |
---|
786 | pgno, i)); |
---|
787 | break; |
---|
788 | default: |
---|
789 | /* Should be impossible. */ |
---|
790 | DB_ASSERT(0); |
---|
791 | ret = EINVAL; |
---|
792 | goto err; |
---|
793 | } |
---|
794 | else |
---|
795 | switch (pagelayout[i]) { |
---|
796 | case 0: |
---|
797 | /* In the middle of an item somewhere. Okay. */ |
---|
798 | break; |
---|
799 | case ITEM_END: |
---|
800 | /* End of an item; switch to out-of-item mode.*/ |
---|
801 | initem = 0; |
---|
802 | break; |
---|
803 | case ITEM_BEGIN: |
---|
804 | /* |
---|
805 | * Hit a second item beginning without an |
---|
806 | * end. Overlap. |
---|
807 | */ |
---|
808 | isbad = 1; |
---|
809 | EPRINT((dbp->dbenv, |
---|
810 | "Overlapping items, page %lu offset %lu", |
---|
811 | pgno, i)); |
---|
812 | break; |
---|
813 | } |
---|
814 | |
---|
815 | (void)__os_free(pagelayout, dbp->pgsize); |
---|
816 | |
---|
817 | /* Verify HOFFSET. */ |
---|
818 | if ((db_indx_t)himark != HOFFSET(h)) { |
---|
819 | EPRINT((dbp->dbenv, "Bad HOFFSET %lu, appears to be %lu", |
---|
820 | HOFFSET(h), himark)); |
---|
821 | isbad = 1; |
---|
822 | } |
---|
823 | |
---|
824 | err: if (nentriesp != NULL) |
---|
825 | *nentriesp = nentries; |
---|
826 | |
---|
827 | if ((t_ret = __db_vrfy_putpageinfo(vdp, pip)) != 0 && ret == 0) |
---|
828 | ret = t_ret; |
---|
829 | |
---|
830 | return ((isbad == 1 && ret == 0) ? DB_VERIFY_BAD : ret); |
---|
831 | } |
---|
832 | |
---|
833 | /* |
---|
834 | * __bam_vrfy_itemorder -- |
---|
835 | * Make sure the items on a page sort correctly. |
---|
836 | * |
---|
837 | * Assumes that NUM_ENT(h) and inp[0]..inp[NUM_ENT(h) - 1] are |
---|
838 | * reasonable; be sure that __bam_vrfy_inp has been called first. |
---|
839 | * |
---|
840 | * If ovflok is set, it also assumes that overflow page chains |
---|
841 | * hanging off the current page have been sanity-checked, and so we |
---|
842 | * can use __bam_cmp to verify their ordering. If it is not set, |
---|
843 | * and we run into an overflow page, carp and return DB_VERIFY_BAD; |
---|
844 | * we shouldn't be called if any exist. |
---|
845 | * |
---|
846 | * PUBLIC: int __bam_vrfy_itemorder __P((DB *, VRFY_DBINFO *, PAGE *, |
---|
847 | * PUBLIC: db_pgno_t, u_int32_t, int, int, u_int32_t)); |
---|
848 | */ |
---|
849 | int |
---|
850 | __bam_vrfy_itemorder(dbp, vdp, h, pgno, nentries, ovflok, hasdups, flags) |
---|
851 | DB *dbp; |
---|
852 | VRFY_DBINFO *vdp; |
---|
853 | PAGE *h; |
---|
854 | db_pgno_t pgno; |
---|
855 | u_int32_t nentries; |
---|
856 | int ovflok, hasdups; |
---|
857 | u_int32_t flags; |
---|
858 | { |
---|
859 | DBT dbta, dbtb, dup1, dup2, *p1, *p2, *tmp; |
---|
860 | BTREE *bt; |
---|
861 | BINTERNAL *bi; |
---|
862 | BKEYDATA *bk; |
---|
863 | BOVERFLOW *bo; |
---|
864 | VRFY_PAGEINFO *pip; |
---|
865 | db_indx_t i; |
---|
866 | int cmp, freedup1, freedup2, isbad, ret, t_ret; |
---|
867 | int (*dupfunc) __P((const DBT *, const DBT *)); |
---|
868 | int (*func) __P((const DBT *, const DBT *)); |
---|
869 | void *buf1, *buf2, *tmpbuf; |
---|
870 | |
---|
871 | /* |
---|
872 | * We need to work in the ORDERCHKONLY environment where we might |
---|
873 | * not have a pip, but we also may need to work in contexts where |
---|
874 | * NUM_ENT isn't safe. |
---|
875 | */ |
---|
876 | if (vdp != NULL) { |
---|
877 | if ((ret = __db_vrfy_getpageinfo(vdp, pgno, &pip)) != 0) |
---|
878 | return (ret); |
---|
879 | nentries = pip->entries; |
---|
880 | } else |
---|
881 | pip = NULL; |
---|
882 | |
---|
883 | ret = isbad = 0; |
---|
884 | bo = NULL; /* Shut up compiler. */ |
---|
885 | |
---|
886 | memset(&dbta, 0, sizeof(DBT)); |
---|
887 | F_SET(&dbta, DB_DBT_REALLOC); |
---|
888 | |
---|
889 | memset(&dbtb, 0, sizeof(DBT)); |
---|
890 | F_SET(&dbtb, DB_DBT_REALLOC); |
---|
891 | |
---|
892 | buf1 = buf2 = NULL; |
---|
893 | |
---|
894 | DB_ASSERT(!LF_ISSET(DB_NOORDERCHK)); |
---|
895 | |
---|
896 | dupfunc = (dbp->dup_compare == NULL) ? __bam_defcmp : dbp->dup_compare; |
---|
897 | if (TYPE(h) == P_LDUP) |
---|
898 | func = dupfunc; |
---|
899 | else { |
---|
900 | func = __bam_defcmp; |
---|
901 | if (dbp->bt_internal != NULL) { |
---|
902 | bt = (BTREE *)dbp->bt_internal; |
---|
903 | if (bt->bt_compare != NULL) |
---|
904 | func = bt->bt_compare; |
---|
905 | } |
---|
906 | } |
---|
907 | |
---|
908 | /* |
---|
909 | * We alternate our use of dbta and dbtb so that we can walk |
---|
910 | * through the page key-by-key without copying a dbt twice. |
---|
911 | * p1 is always the dbt for index i - 1, and p2 for index i. |
---|
912 | */ |
---|
913 | p1 = &dbta; |
---|
914 | p2 = &dbtb; |
---|
915 | |
---|
916 | /* |
---|
917 | * Loop through the entries. nentries ought to contain the |
---|
918 | * actual count, and so is a safe way to terminate the loop; whether |
---|
919 | * we inc. by one or two depends on whether we're a leaf page-- |
---|
920 | * on a leaf page, we care only about keys. On internal pages |
---|
921 | * and LDUP pages, we want to check the order of all entries. |
---|
922 | * |
---|
923 | * Note that on IBTREE pages, we start with item 1, since item |
---|
924 | * 0 doesn't get looked at by __bam_cmp. |
---|
925 | */ |
---|
926 | for (i = (TYPE(h) == P_IBTREE) ? 1 : 0; i < nentries; |
---|
927 | i += (TYPE(h) == P_LBTREE) ? P_INDX : O_INDX) { |
---|
928 | /* |
---|
929 | * Put key i-1, now in p2, into p1, by swapping DBTs and bufs. |
---|
930 | */ |
---|
931 | tmp = p1; |
---|
932 | p1 = p2; |
---|
933 | p2 = tmp; |
---|
934 | tmpbuf = buf1; |
---|
935 | buf1 = buf2; |
---|
936 | buf2 = tmpbuf; |
---|
937 | |
---|
938 | /* |
---|
939 | * Get key i into p2. |
---|
940 | */ |
---|
941 | switch (TYPE(h)) { |
---|
942 | case P_IBTREE: |
---|
943 | bi = GET_BINTERNAL(h, i); |
---|
944 | if (B_TYPE(bi->type) == B_OVERFLOW) { |
---|
945 | bo = (BOVERFLOW *)(bi->data); |
---|
946 | goto overflow; |
---|
947 | } else { |
---|
948 | p2->data = bi->data; |
---|
949 | p2->size = bi->len; |
---|
950 | } |
---|
951 | |
---|
952 | /* |
---|
953 | * The leftmost key on an internal page must be |
---|
954 | * len 0, since it's just a placeholder and |
---|
955 | * automatically sorts less than all keys. |
---|
956 | * |
---|
957 | * XXX |
---|
958 | * This criterion does not currently hold! |
---|
959 | * See todo list item #1686. Meanwhile, it's harmless |
---|
960 | * to just not check for it. |
---|
961 | */ |
---|
962 | #if 0 |
---|
963 | if (i == 0 && bi->len != 0) { |
---|
964 | isbad = 1; |
---|
965 | EPRINT((dbp->dbenv, |
---|
966 | "Lowest key on internal page %lu of nonzero length", |
---|
967 | pgno)); |
---|
968 | } |
---|
969 | #endif |
---|
970 | break; |
---|
971 | case P_LBTREE: |
---|
972 | case P_LDUP: |
---|
973 | bk = GET_BKEYDATA(h, i); |
---|
974 | if (B_TYPE(bk->type) == B_OVERFLOW) { |
---|
975 | bo = (BOVERFLOW *)bk; |
---|
976 | goto overflow; |
---|
977 | } else { |
---|
978 | p2->data = bk->data; |
---|
979 | p2->size = bk->len; |
---|
980 | } |
---|
981 | break; |
---|
982 | default: |
---|
983 | /* |
---|
984 | * This means our caller screwed up and sent us |
---|
985 | * an inappropriate page. |
---|
986 | */ |
---|
987 | TYPE_ERR_PRINT(dbp->dbenv, |
---|
988 | "__bam_vrfy_itemorder", pgno, TYPE(h)) |
---|
989 | DB_ASSERT(0); |
---|
990 | ret = EINVAL; |
---|
991 | goto err; |
---|
992 | /* NOTREACHED */ |
---|
993 | } |
---|
994 | |
---|
995 | if (0) { |
---|
996 | /* |
---|
997 | * If ovflok != 1, we can't safely go chasing |
---|
998 | * overflow pages with the normal routines now; |
---|
999 | * they might be unsafe or nonexistent. Mark this |
---|
1000 | * page as incomplete and return. |
---|
1001 | * |
---|
1002 | * Note that we don't need to worry about freeing |
---|
1003 | * buffers, since they can't have been allocated |
---|
1004 | * if overflow items are unsafe. |
---|
1005 | */ |
---|
1006 | overflow: if (!ovflok) { |
---|
1007 | F_SET(pip, VRFY_INCOMPLETE); |
---|
1008 | goto err; |
---|
1009 | } |
---|
1010 | |
---|
1011 | /* |
---|
1012 | * Overflow items are safe to chase. Do so. |
---|
1013 | * Fetch the overflow item into p2->data, |
---|
1014 | * NULLing it or reallocing it as appropriate. |
---|
1015 | * |
---|
1016 | * (We set p2->data to buf2 before the call |
---|
1017 | * so we're sure to realloc if we can and if p2 |
---|
1018 | * was just pointing at a non-overflow item.) |
---|
1019 | */ |
---|
1020 | p2->data = buf2; |
---|
1021 | if ((ret = __db_goff(dbp, |
---|
1022 | p2, bo->tlen, bo->pgno, NULL, NULL)) != 0) { |
---|
1023 | isbad = 1; |
---|
1024 | EPRINT((dbp->dbenv, |
---|
1025 | "Error %lu in fetching overflow item %lu, page %lu", |
---|
1026 | ret, i, pgno)); |
---|
1027 | } |
---|
1028 | /* In case it got realloc'ed and thus changed. */ |
---|
1029 | buf2 = p2->data; |
---|
1030 | } |
---|
1031 | |
---|
1032 | /* Compare with the last key. */ |
---|
1033 | if (p1->data != NULL && p2->data != NULL) { |
---|
1034 | cmp = func(p1, p2); |
---|
1035 | |
---|
1036 | /* comparison succeeded */ |
---|
1037 | if (cmp > 0) { |
---|
1038 | isbad = 1; |
---|
1039 | EPRINT((dbp->dbenv, |
---|
1040 | "Out-of-order key, page %lu item %lu", |
---|
1041 | pgno, i)); |
---|
1042 | /* proceed */ |
---|
1043 | } else if (cmp == 0) { |
---|
1044 | /* |
---|
1045 | * If they compared equally, this |
---|
1046 | * had better be a (sub)database with dups. |
---|
1047 | * Mark it so we can check during the |
---|
1048 | * structure check. |
---|
1049 | */ |
---|
1050 | if (pip != NULL) |
---|
1051 | F_SET(pip, VRFY_HAS_DUPS); |
---|
1052 | else if (hasdups == 0) { |
---|
1053 | isbad = 1; |
---|
1054 | EPRINT((dbp->dbenv, |
---|
1055 | "Database with no duplicates has duplicated keys on page %lu", pgno)); |
---|
1056 | } |
---|
1057 | |
---|
1058 | /* |
---|
1059 | * If we're a btree leaf, check to see |
---|
1060 | * if the data items of these on-page dups are |
---|
1061 | * in sorted order. If not, flag this, so |
---|
1062 | * that we can make sure during the |
---|
1063 | * structure checks that the DUPSORT flag |
---|
1064 | * is unset. |
---|
1065 | * |
---|
1066 | * At this point i points to a duplicate key. |
---|
1067 | * Compare the datum before it (same key) |
---|
1068 | * to the datum after it, i.e. i-1 to i+1. |
---|
1069 | */ |
---|
1070 | if (TYPE(h) == P_LBTREE) { |
---|
1071 | /* |
---|
1072 | * Unsafe; continue and we'll pick |
---|
1073 | * up the bogus nentries later. |
---|
1074 | */ |
---|
1075 | if (i + 1 >= (db_indx_t)nentries) |
---|
1076 | continue; |
---|
1077 | |
---|
1078 | /* |
---|
1079 | * We don't bother with clever memory |
---|
1080 | * management with on-page dups, |
---|
1081 | * as it's only really a big win |
---|
1082 | * in the overflow case, and overflow |
---|
1083 | * dups are probably (?) rare. |
---|
1084 | */ |
---|
1085 | if (((ret = __bam_safe_getdata(dbp, |
---|
1086 | h, i - 1, ovflok, &dup1, |
---|
1087 | &freedup1)) != 0) || |
---|
1088 | ((ret = __bam_safe_getdata(dbp, |
---|
1089 | h, i + 1, ovflok, &dup2, |
---|
1090 | &freedup2)) != 0)) |
---|
1091 | goto err; |
---|
1092 | |
---|
1093 | /* |
---|
1094 | * If either of the data are NULL, |
---|
1095 | * it's because they're overflows and |
---|
1096 | * it's not safe to chase them now. |
---|
1097 | * Mark an incomplete and return. |
---|
1098 | */ |
---|
1099 | if (dup1.data == NULL || |
---|
1100 | dup2.data == NULL) { |
---|
1101 | DB_ASSERT(!ovflok); |
---|
1102 | F_SET(pip, VRFY_INCOMPLETE); |
---|
1103 | goto err; |
---|
1104 | } |
---|
1105 | |
---|
1106 | /* |
---|
1107 | * If the dups are out of order, |
---|
1108 | * flag this. It's not an error |
---|
1109 | * until we do the structure check |
---|
1110 | * and see whether DUPSORT is set. |
---|
1111 | */ |
---|
1112 | if (dupfunc(&dup1, &dup2) > 0) |
---|
1113 | F_SET(pip, VRFY_DUPS_UNSORTED); |
---|
1114 | |
---|
1115 | if (freedup1) |
---|
1116 | __os_free(dup1.data, 0); |
---|
1117 | if (freedup2) |
---|
1118 | __os_free(dup2.data, 0); |
---|
1119 | } |
---|
1120 | } |
---|
1121 | } |
---|
1122 | } |
---|
1123 | |
---|
1124 | err: if (pip != NULL && |
---|
1125 | ((t_ret = __db_vrfy_putpageinfo(vdp, pip)) != 0) && ret == 0) |
---|
1126 | ret = t_ret; |
---|
1127 | |
---|
1128 | if (buf1 != NULL) |
---|
1129 | __os_free(buf1, 0); |
---|
1130 | if (buf2 != NULL) |
---|
1131 | __os_free(buf2, 0); |
---|
1132 | |
---|
1133 | return ((ret == 0 && isbad == 1) ? DB_VERIFY_BAD : ret); |
---|
1134 | } |
---|
1135 | |
---|
1136 | /* |
---|
1137 | * __bam_vrfy_structure -- |
---|
1138 | * Verify the tree structure of a btree database (including the master |
---|
1139 | * database containing subdbs). |
---|
1140 | * |
---|
1141 | * PUBLIC: int __bam_vrfy_structure __P((DB *, VRFY_DBINFO *, db_pgno_t, |
---|
1142 | * PUBLIC: u_int32_t)); |
---|
1143 | */ |
---|
1144 | int |
---|
1145 | __bam_vrfy_structure(dbp, vdp, meta_pgno, flags) |
---|
1146 | DB *dbp; |
---|
1147 | VRFY_DBINFO *vdp; |
---|
1148 | db_pgno_t meta_pgno; |
---|
1149 | u_int32_t flags; |
---|
1150 | { |
---|
1151 | DB *pgset; |
---|
1152 | VRFY_PAGEINFO *mip, *rip; |
---|
1153 | db_pgno_t root, p; |
---|
1154 | int t_ret, ret; |
---|
1155 | u_int32_t nrecs, level, relen, stflags; |
---|
1156 | |
---|
1157 | mip = rip = 0; |
---|
1158 | pgset = vdp->pgset; |
---|
1159 | |
---|
1160 | if ((ret = __db_vrfy_getpageinfo(vdp, meta_pgno, &mip)) != 0) |
---|
1161 | return (ret); |
---|
1162 | |
---|
1163 | if ((ret = __db_vrfy_pgset_get(pgset, meta_pgno, (int *)&p)) != 0) |
---|
1164 | goto err; |
---|
1165 | if (p != 0) { |
---|
1166 | EPRINT((dbp->dbenv, |
---|
1167 | "Btree metadata page number %lu observed twice", |
---|
1168 | meta_pgno)); |
---|
1169 | ret = DB_VERIFY_BAD; |
---|
1170 | goto err; |
---|
1171 | } |
---|
1172 | if ((ret = __db_vrfy_pgset_inc(pgset, meta_pgno)) != 0) |
---|
1173 | goto err; |
---|
1174 | |
---|
1175 | root = mip->root; |
---|
1176 | |
---|
1177 | if (root == 0) { |
---|
1178 | EPRINT((dbp->dbenv, |
---|
1179 | "Btree metadata page %lu has no root", meta_pgno)); |
---|
1180 | ret = DB_VERIFY_BAD; |
---|
1181 | goto err; |
---|
1182 | } |
---|
1183 | |
---|
1184 | if ((ret = __db_vrfy_getpageinfo(vdp, root, &rip)) != 0) |
---|
1185 | goto err; |
---|
1186 | |
---|
1187 | switch (rip->type) { |
---|
1188 | case P_IBTREE: |
---|
1189 | case P_LBTREE: |
---|
1190 | stflags = flags | ST_TOPLEVEL; |
---|
1191 | if (F_ISSET(mip, VRFY_HAS_DUPS)) |
---|
1192 | stflags |= ST_DUPOK; |
---|
1193 | if (F_ISSET(mip, VRFY_HAS_DUPSORT)) |
---|
1194 | stflags |= ST_DUPSORT; |
---|
1195 | if (F_ISSET(mip, VRFY_HAS_RECNUMS)) |
---|
1196 | stflags |= ST_RECNUM; |
---|
1197 | ret = __bam_vrfy_subtree(dbp, |
---|
1198 | vdp, root, NULL, NULL, stflags, NULL, NULL, NULL); |
---|
1199 | break; |
---|
1200 | case P_IRECNO: |
---|
1201 | case P_LRECNO: |
---|
1202 | stflags = flags | ST_RECNUM | ST_IS_RECNO | ST_TOPLEVEL; |
---|
1203 | if (mip->re_len > 0) |
---|
1204 | stflags |= ST_RELEN; |
---|
1205 | if ((ret = __bam_vrfy_subtree(dbp, vdp, |
---|
1206 | root, NULL, NULL, stflags, &level, &nrecs, &relen)) != 0) |
---|
1207 | goto err; |
---|
1208 | /* |
---|
1209 | * Even if mip->re_len > 0, re_len may come back zero if the |
---|
1210 | * tree is empty. It should be okay to just skip the check in |
---|
1211 | * this case, as if there are any non-deleted keys at all, |
---|
1212 | * that should never happen. |
---|
1213 | */ |
---|
1214 | if (mip->re_len > 0 && relen > 0 && mip->re_len != relen) { |
---|
1215 | EPRINT((dbp->dbenv, |
---|
1216 | "Recno database with meta page %lu has bad re_len %lu", |
---|
1217 | meta_pgno, relen)); |
---|
1218 | ret = DB_VERIFY_BAD; |
---|
1219 | goto err; |
---|
1220 | } |
---|
1221 | ret = 0; |
---|
1222 | break; |
---|
1223 | case P_LDUP: |
---|
1224 | EPRINT((dbp->dbenv, |
---|
1225 | "Duplicate tree referenced from metadata page %lu", |
---|
1226 | meta_pgno)); |
---|
1227 | ret = DB_VERIFY_BAD; |
---|
1228 | break; |
---|
1229 | default: |
---|
1230 | EPRINT((dbp->dbenv, "%s%s", "Btree root of incorrect type ", |
---|
1231 | "%lu specified on meta page %lu", rip->type, meta_pgno)); |
---|
1232 | ret = DB_VERIFY_BAD; |
---|
1233 | break; |
---|
1234 | } |
---|
1235 | |
---|
1236 | err: if (mip != NULL && |
---|
1237 | ((t_ret = __db_vrfy_putpageinfo(vdp, mip)) != 0) && ret == 0) |
---|
1238 | t_ret = ret; |
---|
1239 | if (rip != NULL && |
---|
1240 | ((t_ret = __db_vrfy_putpageinfo(vdp, rip)) != 0) && ret == 0) |
---|
1241 | t_ret = ret; |
---|
1242 | return (ret); |
---|
1243 | } |
---|
1244 | |
---|
1245 | /* |
---|
1246 | * __bam_vrfy_subtree-- |
---|
1247 | * Verify a subtree (or entire) btree with specified root. |
---|
1248 | * |
---|
1249 | * Note that this is public because it must be called to verify |
---|
1250 | * offpage dup trees, including from hash. |
---|
1251 | * |
---|
1252 | * PUBLIC: int __bam_vrfy_subtree __P((DB *, VRFY_DBINFO *, db_pgno_t, void *, |
---|
1253 | * PUBLIC: void *, u_int32_t, u_int32_t *, u_int32_t *, u_int32_t *)); |
---|
1254 | */ |
---|
1255 | int |
---|
1256 | __bam_vrfy_subtree(dbp, |
---|
1257 | vdp, pgno, l, r, flags, levelp, nrecsp, relenp) |
---|
1258 | DB *dbp; |
---|
1259 | VRFY_DBINFO *vdp; |
---|
1260 | db_pgno_t pgno; |
---|
1261 | void *l, *r; |
---|
1262 | u_int32_t flags, *levelp, *nrecsp, *relenp; |
---|
1263 | { |
---|
1264 | BINTERNAL *li, *ri, *lp, *rp; |
---|
1265 | DB *pgset; |
---|
1266 | PAGE *h; |
---|
1267 | VRFY_CHILDINFO *child; |
---|
1268 | VRFY_PAGEINFO *pip; |
---|
1269 | db_recno_t nrecs, child_nrecs; |
---|
1270 | db_indx_t i; |
---|
1271 | int ret, t_ret, isbad, toplevel, p; |
---|
1272 | u_int32_t level, child_level, stflags, child_relen, relen; |
---|
1273 | DBC *cc; |
---|
1274 | |
---|
1275 | ret = isbad = 0; |
---|
1276 | nrecs = 0; |
---|
1277 | h = NULL; |
---|
1278 | relen = 0; |
---|
1279 | rp = (BINTERNAL *)r; |
---|
1280 | lp = (BINTERNAL *)l; |
---|
1281 | |
---|
1282 | if ((ret = __db_vrfy_getpageinfo(vdp, pgno, &pip)) != 0) |
---|
1283 | return (ret); |
---|
1284 | |
---|
1285 | cc = NULL; |
---|
1286 | level = pip->bt_level; |
---|
1287 | |
---|
1288 | toplevel = LF_ISSET(ST_TOPLEVEL); |
---|
1289 | LF_CLR(ST_TOPLEVEL); |
---|
1290 | |
---|
1291 | /* |
---|
1292 | * We are recursively descending a btree, starting from the root |
---|
1293 | * and working our way out to the leaves. |
---|
1294 | * |
---|
1295 | * There are four cases we need to deal with: |
---|
1296 | * 1. pgno is a recno leaf page. Any children are overflows. |
---|
1297 | * 2. pgno is a duplicate leaf page. Any children |
---|
1298 | * are overflow pages; traverse them, and then return |
---|
1299 | * level and nrecs. |
---|
1300 | * 3. pgno is an ordinary leaf page. Check whether dups are |
---|
1301 | * allowed, and if so, traverse any off-page dups or |
---|
1302 | * overflows. Then return nrecs and level. |
---|
1303 | * 4. pgno is a recno internal page. Recursively check any |
---|
1304 | * child pages, making sure their levels are one lower |
---|
1305 | * and their nrecs sum to ours. |
---|
1306 | * 5. pgno is a btree internal page. Same as #4, plus we |
---|
1307 | * must verify that for each pair of BINTERNAL entries |
---|
1308 | * N and N+1, the leftmost item on N's child sorts |
---|
1309 | * greater than N, and the rightmost item on N's child |
---|
1310 | * sorts less than N+1. |
---|
1311 | * |
---|
1312 | * Furthermore, in any sorted page type (P_LDUP, P_LBTREE, P_IBTREE), |
---|
1313 | * we need to verify the internal sort order is correct if, |
---|
1314 | * due to overflow items, we were not able to do so earlier. |
---|
1315 | */ |
---|
1316 | switch (pip->type) { |
---|
1317 | case P_LRECNO: |
---|
1318 | case P_LDUP: |
---|
1319 | case P_LBTREE: |
---|
1320 | /* |
---|
1321 | * Cases 1, 2 and 3 (overflow pages are common to all three); |
---|
1322 | * traverse child list, looking for overflows. |
---|
1323 | */ |
---|
1324 | if ((ret = __db_vrfy_childcursor(vdp, &cc)) != 0) |
---|
1325 | goto err; |
---|
1326 | for (ret = __db_vrfy_ccset(cc, pgno, &child); ret == 0; |
---|
1327 | ret = __db_vrfy_ccnext(cc, &child)) |
---|
1328 | if (child->type == V_OVERFLOW && |
---|
1329 | (ret = __db_vrfy_ovfl_structure(dbp, vdp, |
---|
1330 | child->pgno, child->tlen, |
---|
1331 | flags | ST_OVFL_LEAF)) != 0) { |
---|
1332 | if (ret == DB_VERIFY_BAD) |
---|
1333 | isbad = 1; |
---|
1334 | else |
---|
1335 | goto done; |
---|
1336 | } |
---|
1337 | |
---|
1338 | if ((ret = __db_vrfy_ccclose(cc)) != 0) |
---|
1339 | goto err; |
---|
1340 | cc = NULL; |
---|
1341 | |
---|
1342 | /* Case 1 */ |
---|
1343 | if (pip->type == P_LRECNO) { |
---|
1344 | if (!LF_ISSET(ST_IS_RECNO) && |
---|
1345 | !(LF_ISSET(ST_DUPOK) && !LF_ISSET(ST_DUPSORT))) { |
---|
1346 | isbad = 1; |
---|
1347 | EPRINT((dbp->dbenv, |
---|
1348 | "Recno leaf page %lu in non-recno tree", |
---|
1349 | pgno)); |
---|
1350 | goto done; |
---|
1351 | } |
---|
1352 | goto leaf; |
---|
1353 | } else if (LF_ISSET(ST_IS_RECNO)){ |
---|
1354 | /* |
---|
1355 | * It's a non-recno leaf. Had better not be a recno |
---|
1356 | * subtree. |
---|
1357 | */ |
---|
1358 | isbad = 1; |
---|
1359 | EPRINT((dbp->dbenv, |
---|
1360 | "Non-recno leaf page %lu in recno tree", |
---|
1361 | pgno)); |
---|
1362 | goto done; |
---|
1363 | } |
---|
1364 | |
---|
1365 | /* Case 2--no more work. */ |
---|
1366 | if (pip->type == P_LDUP) |
---|
1367 | goto leaf; |
---|
1368 | |
---|
1369 | /* Case 3 */ |
---|
1370 | |
---|
1371 | /* Check if we have any dups. */ |
---|
1372 | if (F_ISSET(pip, VRFY_HAS_DUPS)) { |
---|
1373 | /* If dups aren't allowed in this btree, trouble. */ |
---|
1374 | if (!LF_ISSET(ST_DUPOK)) { |
---|
1375 | isbad = 1; |
---|
1376 | EPRINT((dbp->dbenv, |
---|
1377 | "Duplicates on page %lu in non-dup btree", |
---|
1378 | pgno)); |
---|
1379 | } else { |
---|
1380 | /* |
---|
1381 | * We correctly have dups. If any are off-page, |
---|
1382 | * traverse those btrees recursively. |
---|
1383 | */ |
---|
1384 | if ((ret = |
---|
1385 | __db_vrfy_childcursor(vdp, &cc)) != 0) |
---|
1386 | goto err; |
---|
1387 | for (ret = __db_vrfy_ccset(cc, pgno, &child); |
---|
1388 | ret == 0; |
---|
1389 | ret = __db_vrfy_ccnext(cc, &child)) { |
---|
1390 | stflags = flags | ST_RECNUM; |
---|
1391 | /* Skip any overflow entries. */ |
---|
1392 | if (child->type == V_DUPLICATE) { |
---|
1393 | if ((ret = __db_vrfy_duptype( |
---|
1394 | dbp, vdp, child->pgno, |
---|
1395 | stflags)) != 0) { |
---|
1396 | isbad = 1; |
---|
1397 | /* Next child. */ |
---|
1398 | continue; |
---|
1399 | } |
---|
1400 | if ((ret = __bam_vrfy_subtree( |
---|
1401 | dbp, vdp, child->pgno, NULL, |
---|
1402 | NULL, stflags, NULL, NULL, |
---|
1403 | NULL)) != 0) { |
---|
1404 | if (ret != |
---|
1405 | DB_VERIFY_BAD) |
---|
1406 | goto err; |
---|
1407 | else |
---|
1408 | isbad = 1; |
---|
1409 | } |
---|
1410 | } |
---|
1411 | } |
---|
1412 | |
---|
1413 | if ((ret = __db_vrfy_ccclose(cc)) != 0) |
---|
1414 | goto err; |
---|
1415 | cc = NULL; |
---|
1416 | |
---|
1417 | /* |
---|
1418 | * If VRFY_DUPS_UNSORTED is set, |
---|
1419 | * ST_DUPSORT had better not be. |
---|
1420 | */ |
---|
1421 | if (F_ISSET(pip, VRFY_DUPS_UNSORTED) && |
---|
1422 | LF_ISSET(ST_DUPSORT)) { |
---|
1423 | EPRINT((dbp->dbenv, |
---|
1424 | "Unsorted duplicate set at page %lu in sorted-dup database", |
---|
1425 | pgno)); |
---|
1426 | isbad = 1; |
---|
1427 | } |
---|
1428 | } |
---|
1429 | } |
---|
1430 | goto leaf; |
---|
1431 | break; |
---|
1432 | case P_IBTREE: |
---|
1433 | case P_IRECNO: |
---|
1434 | /* We handle these below. */ |
---|
1435 | break; |
---|
1436 | default: |
---|
1437 | /* |
---|
1438 | * If a P_IBTREE or P_IRECNO contains a reference to an |
---|
1439 | * invalid page, we'll wind up here; handle it gracefully. |
---|
1440 | * Note that the code at the "done" label assumes that the |
---|
1441 | * current page is a btree/recno one of some sort; this |
---|
1442 | * is not the case here, so we goto err. |
---|
1443 | */ |
---|
1444 | EPRINT((dbp->dbenv, |
---|
1445 | "Page %lu is of inappropriate type %lu", pgno, pip->type)); |
---|
1446 | ret = DB_VERIFY_BAD; |
---|
1447 | goto err; |
---|
1448 | /* NOTREACHED */ |
---|
1449 | } |
---|
1450 | |
---|
1451 | /* |
---|
1452 | * Cases 4 & 5: This is a btree or recno internal page. For each child, |
---|
1453 | * recurse, keeping a running count of nrecs and making sure the level |
---|
1454 | * is always reasonable. |
---|
1455 | */ |
---|
1456 | if ((ret = __db_vrfy_childcursor(vdp, &cc)) != 0) |
---|
1457 | goto err; |
---|
1458 | for (ret = __db_vrfy_ccset(cc, pgno, &child); ret == 0; |
---|
1459 | ret = __db_vrfy_ccnext(cc, &child)) |
---|
1460 | if (child->type == V_RECNO) { |
---|
1461 | if (pip->type != P_IRECNO) { |
---|
1462 | TYPE_ERR_PRINT(dbp->dbenv, "__bam_vrfy_subtree", |
---|
1463 | pgno, pip->type); |
---|
1464 | DB_ASSERT(0); |
---|
1465 | ret = EINVAL; |
---|
1466 | goto err; |
---|
1467 | } |
---|
1468 | if ((ret = __bam_vrfy_subtree(dbp, vdp, child->pgno, |
---|
1469 | NULL, NULL, flags, &child_level, &child_nrecs, |
---|
1470 | &child_relen)) != 0) { |
---|
1471 | if (ret != DB_VERIFY_BAD) |
---|
1472 | goto done; |
---|
1473 | else |
---|
1474 | isbad = 1; |
---|
1475 | } |
---|
1476 | |
---|
1477 | if (LF_ISSET(ST_RELEN)) { |
---|
1478 | if (relen == 0) |
---|
1479 | relen = child_relen; |
---|
1480 | /* |
---|
1481 | * child_relen may be zero if the child subtree |
---|
1482 | * is empty. |
---|
1483 | */ |
---|
1484 | else if (child_relen > 0 && |
---|
1485 | relen != child_relen) { |
---|
1486 | isbad = 1; |
---|
1487 | EPRINT((dbp->dbenv, |
---|
1488 | "Recno page %lu returned bad re_len", |
---|
1489 | child->pgno)); |
---|
1490 | } |
---|
1491 | if (relenp) |
---|
1492 | *relenp = relen; |
---|
1493 | } |
---|
1494 | if (LF_ISSET(ST_RECNUM)) |
---|
1495 | nrecs += child_nrecs; |
---|
1496 | if (level != child_level + 1) { |
---|
1497 | isbad = 1; |
---|
1498 | EPRINT((dbp->dbenv, "%s%lu%s%lu%s%lu", |
---|
1499 | "Recno level incorrect on page ", |
---|
1500 | child->pgno, ": got ", child_level, |
---|
1501 | ", expected ", level - 1)); |
---|
1502 | } |
---|
1503 | } else if (child->type == V_OVERFLOW && |
---|
1504 | (ret = __db_vrfy_ovfl_structure(dbp, vdp, |
---|
1505 | child->pgno, child->tlen, flags)) != 0) { |
---|
1506 | if (ret == DB_VERIFY_BAD) |
---|
1507 | isbad = 1; |
---|
1508 | else |
---|
1509 | goto done; |
---|
1510 | } |
---|
1511 | |
---|
1512 | if ((ret = __db_vrfy_ccclose(cc)) != 0) |
---|
1513 | goto err; |
---|
1514 | cc = NULL; |
---|
1515 | |
---|
1516 | /* We're done with case 4. */ |
---|
1517 | if (pip->type == P_IRECNO) |
---|
1518 | goto done; |
---|
1519 | |
---|
1520 | /* |
---|
1521 | * Case 5. Btree internal pages. |
---|
1522 | * As described above, we need to iterate through all the |
---|
1523 | * items on the page and make sure that our children sort appropriately |
---|
1524 | * with respect to them. |
---|
1525 | * |
---|
1526 | * For each entry, li will be the "left-hand" key for the entry |
---|
1527 | * itself, which must sort lower than all entries on its child; |
---|
1528 | * ri will be the key to its right, which must sort greater. |
---|
1529 | */ |
---|
1530 | if (h == NULL && (ret = memp_fget(dbp->mpf, &pgno, 0, &h)) != 0) |
---|
1531 | goto err; |
---|
1532 | for (i = 0; i < pip->entries; i += O_INDX) { |
---|
1533 | li = GET_BINTERNAL(h, i); |
---|
1534 | ri = (i + O_INDX < pip->entries) ? |
---|
1535 | GET_BINTERNAL(h, i + O_INDX) : NULL; |
---|
1536 | |
---|
1537 | /* |
---|
1538 | * The leftmost key is forcibly sorted less than all entries, |
---|
1539 | * so don't bother passing it. |
---|
1540 | */ |
---|
1541 | if ((ret = __bam_vrfy_subtree(dbp, vdp, li->pgno, |
---|
1542 | i == 0 ? NULL : li, ri, flags, &child_level, |
---|
1543 | &child_nrecs, NULL)) != 0) { |
---|
1544 | if (ret != DB_VERIFY_BAD) |
---|
1545 | goto done; |
---|
1546 | else |
---|
1547 | isbad = 1; |
---|
1548 | } |
---|
1549 | |
---|
1550 | if (LF_ISSET(ST_RECNUM)) { |
---|
1551 | /* |
---|
1552 | * Keep a running tally on the actual record count so |
---|
1553 | * we can return it to our parent (if we have one) or |
---|
1554 | * compare it to the NRECS field if we're a root page. |
---|
1555 | */ |
---|
1556 | nrecs += child_nrecs; |
---|
1557 | |
---|
1558 | /* |
---|
1559 | * Make sure the actual record count of the child |
---|
1560 | * is equal to the value in the BINTERNAL structure. |
---|
1561 | */ |
---|
1562 | if (li->nrecs != child_nrecs) { |
---|
1563 | isbad = 1; |
---|
1564 | EPRINT((dbp->dbenv, |
---|
1565 | "Item %lu page %lu has incorrect record count of %lu, should be %lu", |
---|
1566 | i, pgno, li->nrecs, child_nrecs)); |
---|
1567 | } |
---|
1568 | } |
---|
1569 | |
---|
1570 | if (level != child_level + 1) { |
---|
1571 | isbad = 1; |
---|
1572 | EPRINT((dbp->dbenv, "%s%lu%s%lu%s%lu", |
---|
1573 | "Btree level incorrect on page ", li->pgno, |
---|
1574 | ": got ", child_level, ", expected ", level - 1)); |
---|
1575 | } |
---|
1576 | } |
---|
1577 | |
---|
1578 | if (0) { |
---|
1579 | leaf: level = LEAFLEVEL; |
---|
1580 | if (LF_ISSET(ST_RECNUM)) |
---|
1581 | nrecs = pip->rec_cnt; |
---|
1582 | |
---|
1583 | /* XXX |
---|
1584 | * We should verify that the record count on a leaf page |
---|
1585 | * is the sum of the number of keys and the number of |
---|
1586 | * records in its off-page dups. This requires looking |
---|
1587 | * at the page again, however, and it may all be changing |
---|
1588 | * soon, so for now we don't bother. |
---|
1589 | */ |
---|
1590 | |
---|
1591 | if (LF_ISSET(ST_RELEN) && relenp) |
---|
1592 | *relenp = pip->re_len; |
---|
1593 | } |
---|
1594 | done: if (F_ISSET(pip, VRFY_INCOMPLETE) && isbad == 0 && ret == 0) { |
---|
1595 | /* |
---|
1596 | * During the page-by-page pass, item order verification was |
---|
1597 | * not finished due to the presence of overflow items. If |
---|
1598 | * isbad == 0, though, it's now safe to do so, as we've |
---|
1599 | * traversed any child overflow pages. Do it. |
---|
1600 | */ |
---|
1601 | if (h == NULL && (ret = memp_fget(dbp->mpf, &pgno, 0, &h)) != 0) |
---|
1602 | goto err; |
---|
1603 | if ((ret = __bam_vrfy_itemorder(dbp, |
---|
1604 | vdp, h, pgno, 0, 1, 0, flags)) != 0) |
---|
1605 | goto err; |
---|
1606 | F_CLR(pip, VRFY_INCOMPLETE); |
---|
1607 | } |
---|
1608 | |
---|
1609 | /* |
---|
1610 | * Our parent has sent us BINTERNAL pointers to parent records |
---|
1611 | * so that we can verify our place with respect to them. If it's |
---|
1612 | * appropriate--we have a default sort function--verify this. |
---|
1613 | */ |
---|
1614 | if (isbad == 0 && ret == 0 && !LF_ISSET(DB_NOORDERCHK) && lp != NULL) { |
---|
1615 | if (h == NULL && (ret = memp_fget(dbp->mpf, &pgno, 0, &h)) != 0) |
---|
1616 | goto err; |
---|
1617 | if ((ret = |
---|
1618 | __bam_vrfy_treeorder(dbp, pgno, h, lp, rp, flags)) != 0) { |
---|
1619 | if (ret == DB_VERIFY_BAD) |
---|
1620 | isbad = 1; |
---|
1621 | else |
---|
1622 | goto err; |
---|
1623 | } |
---|
1624 | } |
---|
1625 | |
---|
1626 | /* |
---|
1627 | * This is guaranteed to succeed for leaf pages, but no harm done. |
---|
1628 | * |
---|
1629 | * Internal pages below the top level do not store their own |
---|
1630 | * record numbers, so we skip them. |
---|
1631 | */ |
---|
1632 | if (LF_ISSET(ST_RECNUM) && nrecs != pip->rec_cnt && toplevel) { |
---|
1633 | isbad = 1; |
---|
1634 | EPRINT((dbp->dbenv, |
---|
1635 | "Bad record count on page %lu: got %lu, expected %lu", |
---|
1636 | pgno, nrecs, pip->rec_cnt)); |
---|
1637 | } |
---|
1638 | |
---|
1639 | if (levelp) |
---|
1640 | *levelp = level; |
---|
1641 | if (nrecsp) |
---|
1642 | *nrecsp = nrecs; |
---|
1643 | |
---|
1644 | pgset = vdp->pgset; |
---|
1645 | if ((ret = __db_vrfy_pgset_get(pgset, pgno, &p)) != 0) |
---|
1646 | goto err; |
---|
1647 | if (p != 0) { |
---|
1648 | isbad = 1; |
---|
1649 | EPRINT((dbp->dbenv, "Page %lu linked twice", pgno)); |
---|
1650 | } else if ((ret = __db_vrfy_pgset_inc(pgset, pgno)) != 0) |
---|
1651 | goto err; |
---|
1652 | |
---|
1653 | err: if (h != NULL && (t_ret = memp_fput(dbp->mpf, h, 0)) != 0 && ret == 0) |
---|
1654 | ret = t_ret; |
---|
1655 | if ((t_ret = __db_vrfy_putpageinfo(vdp, pip)) != 0 && ret == 0) |
---|
1656 | ret = t_ret; |
---|
1657 | if (cc != NULL && ((t_ret = __db_vrfy_ccclose(cc)) != 0) && ret == 0) |
---|
1658 | ret = t_ret; |
---|
1659 | return ((ret == 0 && isbad == 1) ? DB_VERIFY_BAD : ret); |
---|
1660 | } |
---|
1661 | |
---|
1662 | /* |
---|
1663 | * __bam_vrfy_treeorder -- |
---|
1664 | * Verify that the lowest key on a page sorts greater than the |
---|
1665 | * BINTERNAL which points to it (lp), and the highest key |
---|
1666 | * sorts less than the BINTERNAL above that (rp). |
---|
1667 | * |
---|
1668 | * If lp is NULL, this means that it was the leftmost key on the |
---|
1669 | * parent, which (regardless of sort function) sorts less than |
---|
1670 | * all keys. No need to check it. |
---|
1671 | * |
---|
1672 | * If rp is NULL, lp was the highest key on the parent, so there's |
---|
1673 | * no higher key we must sort less than. |
---|
1674 | */ |
---|
1675 | static int |
---|
1676 | __bam_vrfy_treeorder(dbp, pgno, h, lp, rp, flags) |
---|
1677 | DB *dbp; |
---|
1678 | db_pgno_t pgno; |
---|
1679 | PAGE *h; |
---|
1680 | BINTERNAL *lp, *rp; |
---|
1681 | u_int32_t flags; |
---|
1682 | { |
---|
1683 | BOVERFLOW *bo; |
---|
1684 | BTREE *t; |
---|
1685 | DBT dbt; |
---|
1686 | db_indx_t last; |
---|
1687 | int (*func)__P((const DBT *, const DBT *)); |
---|
1688 | int ret, cmp; |
---|
1689 | |
---|
1690 | memset(&dbt, 0, sizeof(DBT)); |
---|
1691 | F_SET(&dbt, DB_DBT_MALLOC); |
---|
1692 | t = dbp->bt_internal; |
---|
1693 | func = (t->bt_compare != NULL) ? t->bt_compare : __bam_defcmp; |
---|
1694 | ret = 0; |
---|
1695 | |
---|
1696 | switch (TYPE(h)) { |
---|
1697 | case P_IBTREE: |
---|
1698 | case P_LDUP: |
---|
1699 | last = NUM_ENT(h) - O_INDX; |
---|
1700 | break; |
---|
1701 | case P_LBTREE: |
---|
1702 | last = NUM_ENT(h) - P_INDX; |
---|
1703 | break; |
---|
1704 | default: |
---|
1705 | TYPE_ERR_PRINT(dbp->dbenv, "__bam_vrfy_treeorder", pgno, TYPE(h)); |
---|
1706 | DB_ASSERT(0); |
---|
1707 | return (EINVAL); |
---|
1708 | } |
---|
1709 | |
---|
1710 | /* |
---|
1711 | * The key on page h, the child page, is more likely to be |
---|
1712 | * an overflow page, so we pass its offset, rather than lp/rp's, |
---|
1713 | * into __bam_cmp. This will take advantage of __db_moff. |
---|
1714 | */ |
---|
1715 | |
---|
1716 | /* |
---|
1717 | * Skip first-item check if we're an internal page--the first |
---|
1718 | * entry on an internal page is treated specially by __bam_cmp, |
---|
1719 | * so what's on the page shouldn't matter. (Plus, since we're passing |
---|
1720 | * our page and item 0 as to __bam_cmp, we'll sort before our |
---|
1721 | * parent and falsely report a failure.) |
---|
1722 | */ |
---|
1723 | if (lp != NULL && TYPE(h) != P_IBTREE) { |
---|
1724 | if (lp->type == B_KEYDATA) { |
---|
1725 | dbt.data = lp->data; |
---|
1726 | dbt.size = lp->len; |
---|
1727 | } else if (lp->type == B_OVERFLOW) { |
---|
1728 | bo = (BOVERFLOW *)lp->data; |
---|
1729 | if ((ret = __db_goff(dbp, &dbt, bo->tlen, bo->pgno, |
---|
1730 | NULL, NULL)) != 0) |
---|
1731 | return (ret); |
---|
1732 | } else { |
---|
1733 | DB_ASSERT(0); |
---|
1734 | EPRINT((dbp->dbenv, "Unknown type for internal record")); |
---|
1735 | return (EINVAL); |
---|
1736 | } |
---|
1737 | |
---|
1738 | /* On error, fall through, free if neeeded, and return. */ |
---|
1739 | if ((ret = __bam_cmp(dbp, &dbt, h, 0, func, &cmp)) == 0) { |
---|
1740 | if (cmp > 0) { |
---|
1741 | EPRINT((dbp->dbenv, |
---|
1742 | "First item on page %lu sorted greater than parent entry", |
---|
1743 | PGNO(h))); |
---|
1744 | ret = DB_VERIFY_BAD; |
---|
1745 | } |
---|
1746 | } else |
---|
1747 | EPRINT((dbp->dbenv, |
---|
1748 | "First item on page %lu had comparison error", |
---|
1749 | PGNO(h))); |
---|
1750 | |
---|
1751 | if (dbt.data != lp->data) |
---|
1752 | __os_free(dbt.data, 0); |
---|
1753 | if (ret != 0) |
---|
1754 | return (ret); |
---|
1755 | } |
---|
1756 | |
---|
1757 | if (rp != NULL) { |
---|
1758 | if (rp->type == B_KEYDATA) { |
---|
1759 | dbt.data = rp->data; |
---|
1760 | dbt.size = rp->len; |
---|
1761 | } else if (rp->type == B_OVERFLOW) { |
---|
1762 | bo = (BOVERFLOW *)rp->data; |
---|
1763 | if ((ret = __db_goff(dbp, &dbt, bo->tlen, bo->pgno, |
---|
1764 | NULL, NULL)) != 0) |
---|
1765 | return (ret); |
---|
1766 | } else { |
---|
1767 | DB_ASSERT(0); |
---|
1768 | EPRINT((dbp->dbenv, "Unknown type for internal record")); |
---|
1769 | return (EINVAL); |
---|
1770 | } |
---|
1771 | |
---|
1772 | /* On error, fall through, free if neeeded, and return. */ |
---|
1773 | if ((ret = __bam_cmp(dbp, &dbt, h, last, func, &cmp)) == 0) { |
---|
1774 | if (cmp < 0) { |
---|
1775 | EPRINT((dbp->dbenv, |
---|
1776 | "Last item on page %lu sorted greater than parent entry", |
---|
1777 | PGNO(h))); |
---|
1778 | ret = DB_VERIFY_BAD; |
---|
1779 | } |
---|
1780 | } else |
---|
1781 | EPRINT((dbp->dbenv, |
---|
1782 | "Last item on page %lu had comparison error", |
---|
1783 | PGNO(h))); |
---|
1784 | |
---|
1785 | if (dbt.data != rp->data) |
---|
1786 | __os_free(dbt.data, 0); |
---|
1787 | } |
---|
1788 | |
---|
1789 | return (ret); |
---|
1790 | } |
---|
1791 | |
---|
1792 | /* |
---|
1793 | * __bam_salvage -- |
---|
1794 | * Safely dump out anything that looks like a key on an alleged |
---|
1795 | * btree leaf page. |
---|
1796 | * |
---|
1797 | * PUBLIC: int __bam_salvage __P((DB *, VRFY_DBINFO *, db_pgno_t, u_int32_t, |
---|
1798 | * PUBLIC: PAGE *, void *, int (*)(void *, const void *), DBT *, |
---|
1799 | * PUBLIC: u_int32_t)); |
---|
1800 | */ |
---|
1801 | int |
---|
1802 | __bam_salvage(dbp, vdp, pgno, pgtype, h, handle, callback, key, flags) |
---|
1803 | DB *dbp; |
---|
1804 | VRFY_DBINFO *vdp; |
---|
1805 | db_pgno_t pgno; |
---|
1806 | u_int32_t pgtype; |
---|
1807 | PAGE *h; |
---|
1808 | void *handle; |
---|
1809 | int (*callback) __P((void *, const void *)); |
---|
1810 | DBT *key; |
---|
1811 | u_int32_t flags; |
---|
1812 | { |
---|
1813 | DBT dbt, unkdbt; |
---|
1814 | BKEYDATA *bk; |
---|
1815 | BOVERFLOW *bo; |
---|
1816 | db_indx_t i, beg, end; |
---|
1817 | u_int32_t himark; |
---|
1818 | u_int8_t *pgmap; |
---|
1819 | void *ovflbuf; |
---|
1820 | int t_ret, ret, err_ret; |
---|
1821 | |
---|
1822 | /* Shut up lint. */ |
---|
1823 | COMPQUIET(end, 0); |
---|
1824 | |
---|
1825 | ovflbuf = pgmap = NULL; |
---|
1826 | err_ret = ret = 0; |
---|
1827 | |
---|
1828 | memset(&dbt, 0, sizeof(DBT)); |
---|
1829 | dbt.flags = DB_DBT_REALLOC; |
---|
1830 | |
---|
1831 | memset(&unkdbt, 0, sizeof(DBT)); |
---|
1832 | unkdbt.size = strlen("UNKNOWN") + 1; |
---|
1833 | unkdbt.data = "UNKNOWN"; |
---|
1834 | |
---|
1835 | /* |
---|
1836 | * Allocate a buffer for overflow items. Start at one page; |
---|
1837 | * __db_safe_goff will realloc as needed. |
---|
1838 | */ |
---|
1839 | if ((ret = __os_malloc(dbp->dbenv, dbp->pgsize, NULL, &ovflbuf)) != 0) |
---|
1840 | return (ret); |
---|
1841 | |
---|
1842 | if (LF_ISSET(DB_AGGRESSIVE)) { |
---|
1843 | if ((ret = |
---|
1844 | __os_malloc(dbp->dbenv, dbp->pgsize, NULL, &pgmap)) != 0) |
---|
1845 | goto err; |
---|
1846 | memset(pgmap, 0, dbp->pgsize); |
---|
1847 | } |
---|
1848 | |
---|
1849 | /* |
---|
1850 | * Loop through the inp array, spitting out key/data pairs. |
---|
1851 | * |
---|
1852 | * If we're salvaging normally, loop from 0 through NUM_ENT(h). |
---|
1853 | * If we're being aggressive, loop until we hit the end of the page-- |
---|
1854 | * NUM_ENT() may be bogus. |
---|
1855 | */ |
---|
1856 | i = 0; |
---|
1857 | himark = dbp->pgsize; |
---|
1858 | for (;;) { |
---|
1859 | /* If we're not aggressive, break when we hit NUM_ENT(h). */ |
---|
1860 | if (!LF_ISSET(DB_AGGRESSIVE) && i >= NUM_ENT(h)) |
---|
1861 | break; |
---|
1862 | |
---|
1863 | /* Verify the current item. */ |
---|
1864 | ret = __db_vrfy_inpitem(dbp, |
---|
1865 | h, pgno, i, 1, flags, &himark, NULL); |
---|
1866 | /* If this returned a fatality, it's time to break. */ |
---|
1867 | if (ret == DB_VERIFY_FATAL) |
---|
1868 | break; |
---|
1869 | /* |
---|
1870 | * If this returned 0, it's safe to print or (carefully) |
---|
1871 | * try to fetch. |
---|
1872 | */ |
---|
1873 | if (ret == 0) { |
---|
1874 | /* |
---|
1875 | * We're going to go try to print the next item. If |
---|
1876 | * key is non-NULL, we're a dup page, so we've got to |
---|
1877 | * print the key first, unless SA_SKIPFIRSTKEY is set |
---|
1878 | * and we're on the first entry. |
---|
1879 | */ |
---|
1880 | if (key != NULL && |
---|
1881 | (i != 0 || !LF_ISSET(SA_SKIPFIRSTKEY))) |
---|
1882 | if ((ret = __db_prdbt(key, |
---|
1883 | 0, " ", handle, callback, 0, NULL)) != 0) |
---|
1884 | err_ret = ret; |
---|
1885 | |
---|
1886 | bk = GET_BKEYDATA(h, i); |
---|
1887 | beg = h->inp[i]; |
---|
1888 | switch (bk->type) { |
---|
1889 | case B_DUPLICATE: |
---|
1890 | end = beg + BOVERFLOW_SIZE - 1; |
---|
1891 | /* |
---|
1892 | * If we're not on a normal btree leaf page, |
---|
1893 | * there shouldn't be off-page |
---|
1894 | * dup sets. Something's confused; just |
---|
1895 | * drop it, and the code to pick up unlinked |
---|
1896 | * offpage dup sets will print it out |
---|
1897 | * with key "UNKNOWN" later. |
---|
1898 | */ |
---|
1899 | if (pgtype != P_LBTREE) |
---|
1900 | break; |
---|
1901 | |
---|
1902 | bo = (BOVERFLOW *)bk; |
---|
1903 | |
---|
1904 | /* |
---|
1905 | * If the page number is unreasonable, or |
---|
1906 | * if this is supposed to be a key item, |
---|
1907 | * just spit out "UNKNOWN"--the best we |
---|
1908 | * can do is run into the data items in the |
---|
1909 | * unlinked offpage dup pass. |
---|
1910 | */ |
---|
1911 | if (!IS_VALID_PGNO(bo->pgno) || |
---|
1912 | (i % P_INDX == 0)) { |
---|
1913 | /* Not much to do on failure. */ |
---|
1914 | if ((ret = __db_prdbt(&unkdbt, 0, " ", |
---|
1915 | handle, callback, 0, NULL)) != 0) |
---|
1916 | err_ret = ret; |
---|
1917 | break; |
---|
1918 | } |
---|
1919 | |
---|
1920 | if ((ret = __db_salvage_duptree(dbp, |
---|
1921 | vdp, bo->pgno, &dbt, handle, callback, |
---|
1922 | flags | SA_SKIPFIRSTKEY)) != 0) |
---|
1923 | err_ret = ret; |
---|
1924 | |
---|
1925 | break; |
---|
1926 | default: |
---|
1927 | /* |
---|
1928 | * If we're being aggressive, fall through |
---|
1929 | * and treat as a B_KEYDATA. Seems unlikely |
---|
1930 | * that the length would be okay and the type |
---|
1931 | * bogus, but we can never be sure. |
---|
1932 | */ |
---|
1933 | if (!LF_ISSET(DB_AGGRESSIVE)) |
---|
1934 | break; |
---|
1935 | /* FALLTHROUGH */ |
---|
1936 | case B_KEYDATA: |
---|
1937 | end = ALIGN(beg + bk->len, 4) - 1; |
---|
1938 | dbt.data = bk->data; |
---|
1939 | dbt.size = bk->len; |
---|
1940 | if ((ret = __db_prdbt(&dbt, |
---|
1941 | 0, " ", handle, callback, 0, NULL)) != 0) |
---|
1942 | err_ret = ret; |
---|
1943 | break; |
---|
1944 | case B_OVERFLOW: |
---|
1945 | end = beg + BOVERFLOW_SIZE - 1; |
---|
1946 | bo = (BOVERFLOW *)bk; |
---|
1947 | if ((ret = __db_safe_goff(dbp, vdp, |
---|
1948 | bo->pgno, &dbt, &ovflbuf, flags)) != 0) { |
---|
1949 | err_ret = ret; |
---|
1950 | /* We care about err_ret more. */ |
---|
1951 | (void)__db_prdbt(&unkdbt, 0, " ", |
---|
1952 | handle, callback, 0, NULL); |
---|
1953 | break; |
---|
1954 | } |
---|
1955 | if ((ret = __db_prdbt(&dbt, |
---|
1956 | 0, " ", handle, callback, 0, NULL)) != 0) |
---|
1957 | err_ret = ret; |
---|
1958 | break; |
---|
1959 | } |
---|
1960 | |
---|
1961 | /* |
---|
1962 | * If we're being aggressive, mark the beginning |
---|
1963 | * and end of the item; we'll come back and print |
---|
1964 | * whatever "junk" is in the gaps in case we had |
---|
1965 | * any bogus inp elements and thereby missed stuff. |
---|
1966 | */ |
---|
1967 | if (LF_ISSET(DB_AGGRESSIVE)) { |
---|
1968 | pgmap[beg] = ITEM_BEGIN; |
---|
1969 | pgmap[end] = ITEM_END; |
---|
1970 | } |
---|
1971 | } |
---|
1972 | i += O_INDX; |
---|
1973 | } |
---|
1974 | |
---|
1975 | /* |
---|
1976 | * If i is odd and this is a btree leaf, we've printed out a key but not |
---|
1977 | * a datum; fix this imbalance by printing an "UNKNOWN". |
---|
1978 | */ |
---|
1979 | if (pgtype == P_LBTREE && (i % P_INDX == 1) && ((ret = |
---|
1980 | __db_prdbt(&unkdbt, 0, " ", handle, callback, 0, NULL)) != 0)) |
---|
1981 | err_ret = ret; |
---|
1982 | |
---|
1983 | err: if (pgmap != NULL) |
---|
1984 | __os_free(pgmap, 0); |
---|
1985 | __os_free(ovflbuf, 0); |
---|
1986 | |
---|
1987 | /* Mark this page as done. */ |
---|
1988 | if ((t_ret = __db_salvage_markdone(vdp, pgno)) != 0) |
---|
1989 | return (t_ret); |
---|
1990 | |
---|
1991 | return ((err_ret != 0) ? err_ret : ret); |
---|
1992 | } |
---|
1993 | |
---|
1994 | /* |
---|
1995 | * __bam_salvage_walkdupint -- |
---|
1996 | * Walk a known-good btree or recno internal page which is part of |
---|
1997 | * a dup tree, calling __db_salvage_duptree on each child page. |
---|
1998 | * |
---|
1999 | * PUBLIC: int __bam_salvage_walkdupint __P((DB *, VRFY_DBINFO *, PAGE *, |
---|
2000 | * PUBLIC: DBT *, void *, int (*)(void *, const void *), u_int32_t)); |
---|
2001 | */ |
---|
2002 | int |
---|
2003 | __bam_salvage_walkdupint(dbp, vdp, h, key, handle, callback, flags) |
---|
2004 | DB *dbp; |
---|
2005 | VRFY_DBINFO *vdp; |
---|
2006 | PAGE *h; |
---|
2007 | DBT *key; |
---|
2008 | void *handle; |
---|
2009 | int (*callback) __P((void *, const void *)); |
---|
2010 | u_int32_t flags; |
---|
2011 | { |
---|
2012 | RINTERNAL *ri; |
---|
2013 | BINTERNAL *bi; |
---|
2014 | int ret, t_ret; |
---|
2015 | db_indx_t i; |
---|
2016 | |
---|
2017 | ret = 0; |
---|
2018 | for (i = 0; i < NUM_ENT(h); i++) { |
---|
2019 | switch (TYPE(h)) { |
---|
2020 | case P_IBTREE: |
---|
2021 | bi = GET_BINTERNAL(h, i); |
---|
2022 | if ((t_ret = __db_salvage_duptree(dbp, |
---|
2023 | vdp, bi->pgno, key, handle, callback, flags)) != 0) |
---|
2024 | ret = t_ret; |
---|
2025 | case P_IRECNO: |
---|
2026 | ri = GET_RINTERNAL(h, i); |
---|
2027 | if ((t_ret = __db_salvage_duptree(dbp, |
---|
2028 | vdp, ri->pgno, key, handle, callback, flags)) != 0) |
---|
2029 | ret = t_ret; |
---|
2030 | break; |
---|
2031 | default: |
---|
2032 | __db_err(dbp->dbenv, |
---|
2033 | "__bam_salvage_walkdupint called on non-int. page"); |
---|
2034 | DB_ASSERT(0); |
---|
2035 | return (EINVAL); |
---|
2036 | } |
---|
2037 | /* Pass SA_SKIPFIRSTKEY, if set, on to the 0th child only. */ |
---|
2038 | flags &= ~LF_ISSET(SA_SKIPFIRSTKEY); |
---|
2039 | } |
---|
2040 | |
---|
2041 | return (ret); |
---|
2042 | } |
---|
2043 | |
---|
2044 | /* |
---|
2045 | * __bam_meta2pgset -- |
---|
2046 | * Given a known-good meta page, return in pgsetp a 0-terminated list of |
---|
2047 | * db_pgno_t's corresponding to the pages in the btree. |
---|
2048 | * |
---|
2049 | * We do this by a somewhat sleazy method, to avoid having to traverse the |
---|
2050 | * btree structure neatly: we walk down the left side to the very |
---|
2051 | * first leaf page, then we mark all the pages in the chain of |
---|
2052 | * NEXT_PGNOs (being wary of cycles and invalid ones), then we |
---|
2053 | * consolidate our scratch array into a nice list, and return. This |
---|
2054 | * avoids the memory management hassles of recursion and the |
---|
2055 | * trouble of walking internal pages--they just don't matter, except |
---|
2056 | * for the left branch. |
---|
2057 | * |
---|
2058 | * PUBLIC: int __bam_meta2pgset __P((DB *, VRFY_DBINFO *, BTMETA *, |
---|
2059 | * PUBLIC: u_int32_t, DB *)); |
---|
2060 | */ |
---|
2061 | int |
---|
2062 | __bam_meta2pgset(dbp, vdp, btmeta, flags, pgset) |
---|
2063 | DB *dbp; |
---|
2064 | VRFY_DBINFO *vdp; |
---|
2065 | BTMETA *btmeta; |
---|
2066 | u_int32_t flags; |
---|
2067 | DB *pgset; |
---|
2068 | { |
---|
2069 | BINTERNAL *bi; |
---|
2070 | PAGE *h; |
---|
2071 | RINTERNAL *ri; |
---|
2072 | db_pgno_t current, p; |
---|
2073 | int err_ret, ret; |
---|
2074 | |
---|
2075 | h = NULL; |
---|
2076 | ret = err_ret = 0; |
---|
2077 | DB_ASSERT(pgset != NULL); |
---|
2078 | for (current = btmeta->root;;) { |
---|
2079 | if (!IS_VALID_PGNO(current) || current == PGNO(btmeta)) { |
---|
2080 | err_ret = DB_VERIFY_BAD; |
---|
2081 | goto err; |
---|
2082 | } |
---|
2083 | if ((ret = memp_fget(dbp->mpf, ¤t, 0, &h)) != 0) { |
---|
2084 | err_ret = ret; |
---|
2085 | goto err; |
---|
2086 | } |
---|
2087 | |
---|
2088 | switch (TYPE(h)) { |
---|
2089 | case P_IBTREE: |
---|
2090 | case P_IRECNO: |
---|
2091 | if ((ret = __bam_vrfy(dbp, |
---|
2092 | vdp, h, current, flags | DB_NOORDERCHK)) != 0) { |
---|
2093 | err_ret = ret; |
---|
2094 | goto err; |
---|
2095 | } |
---|
2096 | if (TYPE(h) == P_IBTREE) { |
---|
2097 | bi = GET_BINTERNAL(h, 0); |
---|
2098 | current = bi->pgno; |
---|
2099 | } else { /* P_IRECNO */ |
---|
2100 | ri = GET_RINTERNAL(h, 0); |
---|
2101 | current = ri->pgno; |
---|
2102 | } |
---|
2103 | break; |
---|
2104 | case P_LBTREE: |
---|
2105 | case P_LRECNO: |
---|
2106 | goto traverse; |
---|
2107 | /* NOTREACHED */ |
---|
2108 | default: |
---|
2109 | err_ret = DB_VERIFY_BAD; |
---|
2110 | goto err; |
---|
2111 | } |
---|
2112 | |
---|
2113 | if ((ret = memp_fput(dbp->mpf, h, 0)) != 0) |
---|
2114 | err_ret = ret; |
---|
2115 | h = NULL; |
---|
2116 | } |
---|
2117 | |
---|
2118 | /* |
---|
2119 | * At this point, current is the pgno of leaf page h, the 0th in the |
---|
2120 | * tree we're concerned with. |
---|
2121 | */ |
---|
2122 | traverse: |
---|
2123 | while (IS_VALID_PGNO(current) && current != PGNO_INVALID) { |
---|
2124 | if (h == NULL && |
---|
2125 | (ret = memp_fget(dbp->mpf, ¤t, 0, &h) != 0)) { |
---|
2126 | err_ret = ret; |
---|
2127 | break; |
---|
2128 | } |
---|
2129 | |
---|
2130 | if ((ret = __db_vrfy_pgset_get(pgset, current, (int *)&p)) != 0) |
---|
2131 | goto err; |
---|
2132 | |
---|
2133 | if (p != 0) { |
---|
2134 | /* |
---|
2135 | * We've found a cycle. Return success anyway-- |
---|
2136 | * our caller may as well use however much of |
---|
2137 | * the pgset we've come up with. |
---|
2138 | */ |
---|
2139 | break; |
---|
2140 | } |
---|
2141 | if ((ret = __db_vrfy_pgset_inc(pgset, current)) != 0) |
---|
2142 | goto err; |
---|
2143 | |
---|
2144 | current = NEXT_PGNO(h); |
---|
2145 | if ((ret = memp_fput(dbp->mpf, h, 0)) != 0) |
---|
2146 | err_ret = ret; |
---|
2147 | h = NULL; |
---|
2148 | } |
---|
2149 | |
---|
2150 | err: if (h != NULL) |
---|
2151 | (void)memp_fput(dbp->mpf, h, 0); |
---|
2152 | |
---|
2153 | return (ret == 0 ? err_ret : ret); |
---|
2154 | } |
---|
2155 | |
---|
2156 | /* |
---|
2157 | * __bam_safe_getdata -- |
---|
2158 | * |
---|
2159 | * Utility function for __bam_vrfy_itemorder. Safely gets the datum at |
---|
2160 | * index i, page h, and sticks it in DBT dbt. If ovflok is 1 and i's an |
---|
2161 | * overflow item, we do a safe_goff to get the item and signal that we need |
---|
2162 | * to free dbt->data; if ovflok is 0, we leaves the DBT zeroed. |
---|
2163 | */ |
---|
2164 | static int |
---|
2165 | __bam_safe_getdata(dbp, h, i, ovflok, dbt, freedbtp) |
---|
2166 | DB *dbp; |
---|
2167 | PAGE *h; |
---|
2168 | u_int32_t i; |
---|
2169 | int ovflok; |
---|
2170 | DBT *dbt; |
---|
2171 | int *freedbtp; |
---|
2172 | { |
---|
2173 | BKEYDATA *bk; |
---|
2174 | BOVERFLOW *bo; |
---|
2175 | |
---|
2176 | memset(dbt, 0, sizeof(DBT)); |
---|
2177 | *freedbtp = 0; |
---|
2178 | |
---|
2179 | bk = GET_BKEYDATA(h, i); |
---|
2180 | if (B_TYPE(bk->type) == B_OVERFLOW) { |
---|
2181 | if (!ovflok) |
---|
2182 | return(0); |
---|
2183 | |
---|
2184 | bo = (BOVERFLOW *)bk; |
---|
2185 | F_SET(dbt, DB_DBT_MALLOC); |
---|
2186 | |
---|
2187 | *freedbtp = 1; |
---|
2188 | return (__db_goff(dbp, dbt, bo->tlen, bo->pgno, NULL, NULL)); |
---|
2189 | /* NOTREACHED */ |
---|
2190 | } else { |
---|
2191 | dbt->data = bk->data; |
---|
2192 | dbt->size = bk->len; |
---|
2193 | } |
---|
2194 | |
---|
2195 | return (0); |
---|
2196 | } |
---|