1 | /*- |
---|
2 | * See the file LICENSE for redistribution information. |
---|
3 | * |
---|
4 | * Copyright (c) 1996-2002 |
---|
5 | * Sleepycat Software. All rights reserved. |
---|
6 | */ |
---|
7 | #include "db_config.h" |
---|
8 | |
---|
9 | #ifndef lint |
---|
10 | static const char revid[] = "$Id: mp_region.c,v 1.1.1.1 2004-12-17 17:27:13 ghudson Exp $"; |
---|
11 | #endif /* not lint */ |
---|
12 | |
---|
13 | #ifndef NO_SYSTEM_INCLUDES |
---|
14 | #include <sys/types.h> |
---|
15 | |
---|
16 | #include <string.h> |
---|
17 | #endif |
---|
18 | |
---|
19 | #include "db_int.h" |
---|
20 | #include "dbinc/db_shash.h" |
---|
21 | #include "dbinc/mp.h" |
---|
22 | |
---|
23 | static int __mpool_init __P((DB_ENV *, DB_MPOOL *, int, int)); |
---|
24 | #ifdef HAVE_MUTEX_SYSTEM_RESOURCES |
---|
25 | static size_t __mpool_region_maint __P((REGINFO *)); |
---|
26 | #endif |
---|
27 | |
---|
28 | /* |
---|
29 | * __memp_open -- |
---|
30 | * Internal version of memp_open: only called from DB_ENV->open. |
---|
31 | * |
---|
32 | * PUBLIC: int __memp_open __P((DB_ENV *)); |
---|
33 | */ |
---|
34 | int |
---|
35 | __memp_open(dbenv) |
---|
36 | DB_ENV *dbenv; |
---|
37 | { |
---|
38 | DB_MPOOL *dbmp; |
---|
39 | MPOOL *mp; |
---|
40 | REGINFO reginfo; |
---|
41 | roff_t reg_size, *regids; |
---|
42 | u_int32_t i; |
---|
43 | int htab_buckets, ret; |
---|
44 | |
---|
45 | /* Figure out how big each cache region is. */ |
---|
46 | reg_size = (dbenv->mp_gbytes / dbenv->mp_ncache) * GIGABYTE; |
---|
47 | reg_size += ((dbenv->mp_gbytes % |
---|
48 | dbenv->mp_ncache) * GIGABYTE) / dbenv->mp_ncache; |
---|
49 | reg_size += dbenv->mp_bytes / dbenv->mp_ncache; |
---|
50 | |
---|
51 | /* |
---|
52 | * Figure out how many hash buckets each region will have. Assume we |
---|
53 | * want to keep the hash chains with under 10 pages on each chain. We |
---|
54 | * don't know the pagesize in advance, and it may differ for different |
---|
55 | * files. Use a pagesize of 1K for the calculation -- we walk these |
---|
56 | * chains a lot, they must be kept short. |
---|
57 | */ |
---|
58 | htab_buckets = __db_tablesize((reg_size / (1 * 1024)) / 10); |
---|
59 | |
---|
60 | /* Create and initialize the DB_MPOOL structure. */ |
---|
61 | if ((ret = __os_calloc(dbenv, 1, sizeof(*dbmp), &dbmp)) != 0) |
---|
62 | return (ret); |
---|
63 | LIST_INIT(&dbmp->dbregq); |
---|
64 | TAILQ_INIT(&dbmp->dbmfq); |
---|
65 | dbmp->dbenv = dbenv; |
---|
66 | |
---|
67 | /* Join/create the first mpool region. */ |
---|
68 | memset(®info, 0, sizeof(REGINFO)); |
---|
69 | reginfo.type = REGION_TYPE_MPOOL; |
---|
70 | reginfo.id = INVALID_REGION_ID; |
---|
71 | reginfo.mode = dbenv->db_mode; |
---|
72 | reginfo.flags = REGION_JOIN_OK; |
---|
73 | if (F_ISSET(dbenv, DB_ENV_CREATE)) |
---|
74 | F_SET(®info, REGION_CREATE_OK); |
---|
75 | if ((ret = __db_r_attach(dbenv, ®info, reg_size)) != 0) |
---|
76 | goto err; |
---|
77 | |
---|
78 | /* |
---|
79 | * If we created the region, initialize it. Create or join any |
---|
80 | * additional regions. |
---|
81 | */ |
---|
82 | if (F_ISSET(®info, REGION_CREATE)) { |
---|
83 | /* |
---|
84 | * We define how many regions there are going to be, allocate |
---|
85 | * the REGINFO structures and create them. Make sure we don't |
---|
86 | * clear the wrong entries on error. |
---|
87 | */ |
---|
88 | dbmp->nreg = dbenv->mp_ncache; |
---|
89 | if ((ret = __os_calloc(dbenv, |
---|
90 | dbmp->nreg, sizeof(REGINFO), &dbmp->reginfo)) != 0) |
---|
91 | goto err; |
---|
92 | /* Make sure we don't clear the wrong entries on error. */ |
---|
93 | for (i = 0; i < dbmp->nreg; ++i) |
---|
94 | dbmp->reginfo[i].id = INVALID_REGION_ID; |
---|
95 | dbmp->reginfo[0] = reginfo; |
---|
96 | |
---|
97 | /* Initialize the first region. */ |
---|
98 | if ((ret = __mpool_init(dbenv, dbmp, 0, htab_buckets)) != 0) |
---|
99 | goto err; |
---|
100 | |
---|
101 | /* |
---|
102 | * Create/initialize remaining regions and copy their IDs into |
---|
103 | * the first region. |
---|
104 | */ |
---|
105 | mp = R_ADDR(dbmp->reginfo, dbmp->reginfo[0].rp->primary); |
---|
106 | regids = R_ADDR(dbmp->reginfo, mp->regids); |
---|
107 | for (i = 1; i < dbmp->nreg; ++i) { |
---|
108 | dbmp->reginfo[i].type = REGION_TYPE_MPOOL; |
---|
109 | dbmp->reginfo[i].id = INVALID_REGION_ID; |
---|
110 | dbmp->reginfo[i].mode = dbenv->db_mode; |
---|
111 | dbmp->reginfo[i].flags = REGION_CREATE_OK; |
---|
112 | if ((ret = __db_r_attach( |
---|
113 | dbenv, &dbmp->reginfo[i], reg_size)) != 0) |
---|
114 | goto err; |
---|
115 | if ((ret = |
---|
116 | __mpool_init(dbenv, dbmp, i, htab_buckets)) != 0) |
---|
117 | goto err; |
---|
118 | R_UNLOCK(dbenv, &dbmp->reginfo[i]); |
---|
119 | |
---|
120 | regids[i] = dbmp->reginfo[i].id; |
---|
121 | } |
---|
122 | |
---|
123 | R_UNLOCK(dbenv, dbmp->reginfo); |
---|
124 | } else { |
---|
125 | /* |
---|
126 | * Determine how many regions there are going to be, allocate |
---|
127 | * the REGINFO structures and fill in local copies of that |
---|
128 | * information. |
---|
129 | */ |
---|
130 | mp = R_ADDR(®info, reginfo.rp->primary); |
---|
131 | dbmp->nreg = mp->nreg; |
---|
132 | if ((ret = __os_calloc(dbenv, |
---|
133 | dbmp->nreg, sizeof(REGINFO), &dbmp->reginfo)) != 0) |
---|
134 | goto err; |
---|
135 | /* Make sure we don't clear the wrong entries on error. */ |
---|
136 | for (i = 0; i < dbmp->nreg; ++i) |
---|
137 | dbmp->reginfo[i].id = INVALID_REGION_ID; |
---|
138 | dbmp->reginfo[0] = reginfo; |
---|
139 | |
---|
140 | /* |
---|
141 | * We have to unlock the primary mpool region before we attempt |
---|
142 | * to join the additional mpool regions. If we don't, we can |
---|
143 | * deadlock. The scenario is that we hold the primary mpool |
---|
144 | * region lock. We then try to attach to an additional mpool |
---|
145 | * region, which requires the acquisition/release of the main |
---|
146 | * region lock (to search the list of regions). If another |
---|
147 | * thread of control already holds the main region lock and is |
---|
148 | * waiting on our primary mpool region lock, we'll deadlock. |
---|
149 | * See [#4696] for more information. |
---|
150 | */ |
---|
151 | R_UNLOCK(dbenv, dbmp->reginfo); |
---|
152 | |
---|
153 | /* Join remaining regions. */ |
---|
154 | regids = R_ADDR(dbmp->reginfo, mp->regids); |
---|
155 | for (i = 1; i < dbmp->nreg; ++i) { |
---|
156 | dbmp->reginfo[i].type = REGION_TYPE_MPOOL; |
---|
157 | dbmp->reginfo[i].id = regids[i]; |
---|
158 | dbmp->reginfo[i].mode = 0; |
---|
159 | dbmp->reginfo[i].flags = REGION_JOIN_OK; |
---|
160 | if ((ret = __db_r_attach( |
---|
161 | dbenv, &dbmp->reginfo[i], 0)) != 0) |
---|
162 | goto err; |
---|
163 | R_UNLOCK(dbenv, &dbmp->reginfo[i]); |
---|
164 | } |
---|
165 | } |
---|
166 | |
---|
167 | /* Set the local addresses for the regions. */ |
---|
168 | for (i = 0; i < dbmp->nreg; ++i) |
---|
169 | dbmp->reginfo[i].primary = |
---|
170 | R_ADDR(&dbmp->reginfo[i], dbmp->reginfo[i].rp->primary); |
---|
171 | |
---|
172 | /* If the region is threaded, allocate a mutex to lock the handles. */ |
---|
173 | if (F_ISSET(dbenv, DB_ENV_THREAD) && |
---|
174 | (ret = __db_mutex_setup(dbenv, dbmp->reginfo, &dbmp->mutexp, |
---|
175 | MUTEX_ALLOC | MUTEX_THREAD)) != 0) |
---|
176 | goto err; |
---|
177 | |
---|
178 | dbenv->mp_handle = dbmp; |
---|
179 | return (0); |
---|
180 | |
---|
181 | err: if (dbmp->reginfo != NULL && dbmp->reginfo[0].addr != NULL) { |
---|
182 | if (F_ISSET(dbmp->reginfo, REGION_CREATE)) |
---|
183 | ret = __db_panic(dbenv, ret); |
---|
184 | |
---|
185 | R_UNLOCK(dbenv, dbmp->reginfo); |
---|
186 | |
---|
187 | for (i = 0; i < dbmp->nreg; ++i) |
---|
188 | if (dbmp->reginfo[i].id != INVALID_REGION_ID) |
---|
189 | (void)__db_r_detach( |
---|
190 | dbenv, &dbmp->reginfo[i], 0); |
---|
191 | __os_free(dbenv, dbmp->reginfo); |
---|
192 | } |
---|
193 | if (dbmp->mutexp != NULL) |
---|
194 | __db_mutex_free(dbenv, dbmp->reginfo, dbmp->mutexp); |
---|
195 | __os_free(dbenv, dbmp); |
---|
196 | return (ret); |
---|
197 | } |
---|
198 | |
---|
199 | /* |
---|
200 | * __mpool_init -- |
---|
201 | * Initialize a MPOOL structure in shared memory. |
---|
202 | */ |
---|
203 | static int |
---|
204 | __mpool_init(dbenv, dbmp, reginfo_off, htab_buckets) |
---|
205 | DB_ENV *dbenv; |
---|
206 | DB_MPOOL *dbmp; |
---|
207 | int reginfo_off, htab_buckets; |
---|
208 | { |
---|
209 | DB_MPOOL_HASH *htab; |
---|
210 | MPOOL *mp; |
---|
211 | REGINFO *reginfo; |
---|
212 | #ifdef HAVE_MUTEX_SYSTEM_RESOURCES |
---|
213 | size_t maint_size; |
---|
214 | #endif |
---|
215 | int i, ret; |
---|
216 | void *p; |
---|
217 | |
---|
218 | mp = NULL; |
---|
219 | |
---|
220 | reginfo = &dbmp->reginfo[reginfo_off]; |
---|
221 | if ((ret = __db_shalloc(reginfo->addr, |
---|
222 | sizeof(MPOOL), MUTEX_ALIGN, ®info->primary)) != 0) |
---|
223 | goto mem_err; |
---|
224 | reginfo->rp->primary = R_OFFSET(reginfo, reginfo->primary); |
---|
225 | mp = reginfo->primary; |
---|
226 | memset(mp, 0, sizeof(*mp)); |
---|
227 | |
---|
228 | #ifdef HAVE_MUTEX_SYSTEM_RESOURCES |
---|
229 | maint_size = __mpool_region_maint(reginfo); |
---|
230 | /* Allocate room for the maintenance info and initialize it. */ |
---|
231 | if ((ret = __db_shalloc(reginfo->addr, |
---|
232 | sizeof(REGMAINT) + maint_size, 0, &p)) != 0) |
---|
233 | goto mem_err; |
---|
234 | __db_maintinit(reginfo, p, maint_size); |
---|
235 | mp->maint_off = R_OFFSET(reginfo, p); |
---|
236 | #endif |
---|
237 | |
---|
238 | if (reginfo_off == 0) { |
---|
239 | SH_TAILQ_INIT(&mp->mpfq); |
---|
240 | |
---|
241 | ZERO_LSN(mp->lsn); |
---|
242 | |
---|
243 | mp->nreg = dbmp->nreg; |
---|
244 | if ((ret = __db_shalloc(dbmp->reginfo[0].addr, |
---|
245 | dbmp->nreg * sizeof(int), 0, &p)) != 0) |
---|
246 | goto mem_err; |
---|
247 | mp->regids = R_OFFSET(dbmp->reginfo, p); |
---|
248 | } |
---|
249 | |
---|
250 | /* Allocate hash table space and initialize it. */ |
---|
251 | if ((ret = __db_shalloc(reginfo->addr, |
---|
252 | htab_buckets * sizeof(DB_MPOOL_HASH), 0, &htab)) != 0) |
---|
253 | goto mem_err; |
---|
254 | mp->htab = R_OFFSET(reginfo, htab); |
---|
255 | for (i = 0; i < htab_buckets; i++) { |
---|
256 | if ((ret = __db_mutex_setup(dbenv, |
---|
257 | reginfo, &htab[i].hash_mutex, |
---|
258 | MUTEX_NO_RLOCK)) != 0) |
---|
259 | return (ret); |
---|
260 | SH_TAILQ_INIT(&htab[i].hash_bucket); |
---|
261 | htab[i].hash_page_dirty = htab[i].hash_priority = 0; |
---|
262 | } |
---|
263 | mp->htab_buckets = mp->stat.st_hash_buckets = htab_buckets; |
---|
264 | |
---|
265 | /* |
---|
266 | * Only the environment creator knows the total cache size, fill in |
---|
267 | * those statistics now. |
---|
268 | */ |
---|
269 | mp->stat.st_gbytes = dbenv->mp_gbytes; |
---|
270 | mp->stat.st_bytes = dbenv->mp_bytes; |
---|
271 | return (0); |
---|
272 | |
---|
273 | mem_err:__db_err(dbenv, "Unable to allocate memory for mpool region"); |
---|
274 | return (ret); |
---|
275 | } |
---|
276 | |
---|
277 | /* |
---|
278 | * __memp_dbenv_refresh -- |
---|
279 | * Clean up after the mpool system on a close or failed open. |
---|
280 | * |
---|
281 | * PUBLIC: int __memp_dbenv_refresh __P((DB_ENV *)); |
---|
282 | */ |
---|
283 | int |
---|
284 | __memp_dbenv_refresh(dbenv) |
---|
285 | DB_ENV *dbenv; |
---|
286 | { |
---|
287 | DB_MPOOL *dbmp; |
---|
288 | DB_MPOOLFILE *dbmfp; |
---|
289 | DB_MPREG *mpreg; |
---|
290 | u_int32_t i; |
---|
291 | int ret, t_ret; |
---|
292 | |
---|
293 | ret = 0; |
---|
294 | dbmp = dbenv->mp_handle; |
---|
295 | |
---|
296 | /* Discard DB_MPREGs. */ |
---|
297 | while ((mpreg = LIST_FIRST(&dbmp->dbregq)) != NULL) { |
---|
298 | LIST_REMOVE(mpreg, q); |
---|
299 | __os_free(dbenv, mpreg); |
---|
300 | } |
---|
301 | |
---|
302 | /* Discard DB_MPOOLFILEs. */ |
---|
303 | while ((dbmfp = TAILQ_FIRST(&dbmp->dbmfq)) != NULL) |
---|
304 | if ((t_ret = __memp_fclose_int(dbmfp, 0)) != 0 && ret == 0) |
---|
305 | ret = t_ret; |
---|
306 | |
---|
307 | /* Discard the thread mutex. */ |
---|
308 | if (dbmp->mutexp != NULL) |
---|
309 | __db_mutex_free(dbenv, dbmp->reginfo, dbmp->mutexp); |
---|
310 | |
---|
311 | /* Detach from the region(s). */ |
---|
312 | for (i = 0; i < dbmp->nreg; ++i) |
---|
313 | if ((t_ret = __db_r_detach( |
---|
314 | dbenv, &dbmp->reginfo[i], 0)) != 0 && ret == 0) |
---|
315 | ret = t_ret; |
---|
316 | |
---|
317 | __os_free(dbenv, dbmp->reginfo); |
---|
318 | __os_free(dbenv, dbmp); |
---|
319 | |
---|
320 | dbenv->mp_handle = NULL; |
---|
321 | return (ret); |
---|
322 | } |
---|
323 | |
---|
324 | #ifdef HAVE_MUTEX_SYSTEM_RESOURCES |
---|
325 | /* |
---|
326 | * __mpool_region_maint -- |
---|
327 | * Return the amount of space needed for region maintenance info. |
---|
328 | * |
---|
329 | */ |
---|
330 | static size_t |
---|
331 | __mpool_region_maint(infop) |
---|
332 | REGINFO *infop; |
---|
333 | { |
---|
334 | size_t s; |
---|
335 | int numlocks; |
---|
336 | |
---|
337 | /* |
---|
338 | * For mutex maintenance we need one mutex per possible page. |
---|
339 | * Compute the maximum number of pages this cache can have. |
---|
340 | * Also add in an mpool mutex and mutexes for all dbenv and db |
---|
341 | * handles. |
---|
342 | */ |
---|
343 | numlocks = ((infop->rp->size / DB_MIN_PGSIZE) + 1); |
---|
344 | numlocks += DB_MAX_HANDLES; |
---|
345 | s = sizeof(roff_t) * numlocks; |
---|
346 | return (s); |
---|
347 | } |
---|
348 | #endif |
---|
349 | |
---|
350 | /* |
---|
351 | * __mpool_region_destroy |
---|
352 | * Destroy any region maintenance info. |
---|
353 | * |
---|
354 | * PUBLIC: void __mpool_region_destroy __P((DB_ENV *, REGINFO *)); |
---|
355 | */ |
---|
356 | void |
---|
357 | __mpool_region_destroy(dbenv, infop) |
---|
358 | DB_ENV *dbenv; |
---|
359 | REGINFO *infop; |
---|
360 | { |
---|
361 | __db_shlocks_destroy(infop, (REGMAINT *)R_ADDR(infop, |
---|
362 | ((MPOOL *)R_ADDR(infop, infop->rp->primary))->maint_off)); |
---|
363 | |
---|
364 | COMPQUIET(dbenv, NULL); |
---|
365 | COMPQUIET(infop, NULL); |
---|
366 | } |
---|
367 | |
---|
368 | /* |
---|
369 | * __memp_nameop |
---|
370 | * Remove or rename a file in the pool. |
---|
371 | * |
---|
372 | * PUBLIC: int __memp_nameop __P((DB_ENV *, |
---|
373 | * PUBLIC: u_int8_t *, const char *, const char *, const char *)); |
---|
374 | * |
---|
375 | * XXX |
---|
376 | * Undocumented interface: DB private. |
---|
377 | */ |
---|
378 | int |
---|
379 | __memp_nameop(dbenv, fileid, newname, fullold, fullnew) |
---|
380 | DB_ENV *dbenv; |
---|
381 | u_int8_t *fileid; |
---|
382 | const char *newname, *fullold, *fullnew; |
---|
383 | { |
---|
384 | DB_MPOOL *dbmp; |
---|
385 | MPOOL *mp; |
---|
386 | MPOOLFILE *mfp; |
---|
387 | roff_t newname_off; |
---|
388 | int locked, ret; |
---|
389 | void *p; |
---|
390 | |
---|
391 | locked = 0; |
---|
392 | dbmp = NULL; |
---|
393 | |
---|
394 | if (!MPOOL_ON(dbenv)) |
---|
395 | goto fsop; |
---|
396 | |
---|
397 | dbmp = dbenv->mp_handle; |
---|
398 | mp = dbmp->reginfo[0].primary; |
---|
399 | |
---|
400 | /* |
---|
401 | * Remove or rename a file that the mpool might know about. We assume |
---|
402 | * that the fop layer has the file locked for exclusive access, so we |
---|
403 | * don't worry about locking except for the mpool mutexes. Checkpoint |
---|
404 | * can happen at any time, independent of file locking, so we have to |
---|
405 | * do the actual unlink or rename system call to avoid any race. |
---|
406 | * |
---|
407 | * If this is a rename, allocate first, because we can't recursively |
---|
408 | * grab the region lock. |
---|
409 | */ |
---|
410 | if (newname == NULL) |
---|
411 | p = NULL; |
---|
412 | else { |
---|
413 | if ((ret = __memp_alloc(dbmp, dbmp->reginfo, |
---|
414 | NULL, strlen(newname) + 1, &newname_off, &p)) != 0) |
---|
415 | return (ret); |
---|
416 | memcpy(p, newname, strlen(newname) + 1); |
---|
417 | } |
---|
418 | |
---|
419 | locked = 1; |
---|
420 | R_LOCK(dbenv, dbmp->reginfo); |
---|
421 | |
---|
422 | /* |
---|
423 | * Find the file -- if mpool doesn't know about this file, that's not |
---|
424 | * an error-- we may not have it open. |
---|
425 | */ |
---|
426 | for (mfp = SH_TAILQ_FIRST(&mp->mpfq, __mpoolfile); |
---|
427 | mfp != NULL; mfp = SH_TAILQ_NEXT(mfp, q, __mpoolfile)) { |
---|
428 | /* Ignore non-active files. */ |
---|
429 | if (F_ISSET(mfp, MP_DEADFILE | MP_TEMP)) |
---|
430 | continue; |
---|
431 | |
---|
432 | /* Ignore non-matching files. */ |
---|
433 | if (memcmp(fileid, R_ADDR( |
---|
434 | dbmp->reginfo, mfp->fileid_off), DB_FILE_ID_LEN) != 0) |
---|
435 | continue; |
---|
436 | |
---|
437 | /* If newname is NULL, we're removing the file. */ |
---|
438 | if (newname == NULL) { |
---|
439 | MUTEX_LOCK(dbenv, &mfp->mutex); |
---|
440 | MPOOLFILE_IGNORE(mfp); |
---|
441 | MUTEX_UNLOCK(dbenv, &mfp->mutex); |
---|
442 | } else { |
---|
443 | /* |
---|
444 | * Else, it's a rename. We've allocated memory |
---|
445 | * for the new name. Swap it with the old one. |
---|
446 | */ |
---|
447 | p = R_ADDR(dbmp->reginfo, mfp->path_off); |
---|
448 | mfp->path_off = newname_off; |
---|
449 | } |
---|
450 | break; |
---|
451 | } |
---|
452 | |
---|
453 | /* Delete the memory we no longer need. */ |
---|
454 | if (p != NULL) |
---|
455 | __db_shalloc_free(dbmp->reginfo[0].addr, p); |
---|
456 | |
---|
457 | fsop: if (newname == NULL) |
---|
458 | (void)__os_unlink(dbenv, fullold); |
---|
459 | else |
---|
460 | (void)__os_rename(dbenv, fullold, fullnew, 1); |
---|
461 | |
---|
462 | if (locked) |
---|
463 | R_UNLOCK(dbenv, dbmp->reginfo); |
---|
464 | |
---|
465 | return (0); |
---|
466 | } |
---|