| /* |
| ** 2012-01-23 |
| ** |
| ** The author disclaims copyright to this source code. In place of |
| ** a legal notice, here is a blessing: |
| ** |
| ** May you do good and not evil. |
| ** May you find forgiveness for yourself and forgive others. |
| ** May you share freely, never taking more than you give. |
| ** |
| ************************************************************************* |
| ** |
| ** Utilities used to help multiple LSM clients to coexist within the |
| ** same process space. |
| */ |
| #include "lsmInt.h" |
| |
| /* |
| ** Global data. All global variables used by code in this file are grouped |
| ** into the following structure instance. |
| ** |
| ** pDatabase: |
| ** Linked list of all Database objects allocated within this process. |
| ** This list may not be traversed without holding the global mutex (see |
| ** functions enterGlobalMutex() and leaveGlobalMutex()). |
| */ |
| static struct SharedData { |
| Database *pDatabase; /* Linked list of all Database objects */ |
| } gShared; |
| |
| /* |
| ** Database structure. There is one such structure for each distinct |
| ** database accessed by this process. They are stored in the singly linked |
| ** list starting at global variable gShared.pDatabase. Database objects are |
| ** reference counted. Once the number of connections to the associated |
| ** database drops to zero, they are removed from the linked list and deleted. |
| ** |
| ** pFile: |
| ** In multi-process mode, this file descriptor is used to obtain locks |
| ** and to access shared-memory. In single process mode, its only job is |
| ** to hold the exclusive lock on the file. |
| ** |
| */ |
| struct Database { |
| /* Protected by the global mutex (enterGlobalMutex/leaveGlobalMutex): */ |
| char *zName; /* Canonical path to database file */ |
| int nName; /* strlen(zName) */ |
| int nDbRef; /* Number of associated lsm_db handles */ |
| Database *pDbNext; /* Next Database structure in global list */ |
| |
| /* Protected by the local mutex (pClientMutex) */ |
| int bReadonly; /* True if Database.pFile is read-only */ |
| int bMultiProc; /* True if running in multi-process mode */ |
| lsm_file *pFile; /* Used for locks/shm in multi-proc mode */ |
| LsmFile *pLsmFile; /* List of deferred closes */ |
| lsm_mutex *pClientMutex; /* Protects the apShmChunk[] and pConn */ |
| int nShmChunk; /* Number of entries in apShmChunk[] array */ |
| void **apShmChunk; /* Array of "shared" memory regions */ |
| lsm_db *pConn; /* List of connections to this db. */ |
| }; |
| |
| /* |
| ** Functions to enter and leave the global mutex. This mutex is used |
| ** to protect the global linked-list headed at gShared.pDatabase. |
| */ |
| static int enterGlobalMutex(lsm_env *pEnv){ |
| lsm_mutex *p; |
| int rc = lsmMutexStatic(pEnv, LSM_MUTEX_GLOBAL, &p); |
| if( rc==LSM_OK ) lsmMutexEnter(pEnv, p); |
| return rc; |
| } |
| static void leaveGlobalMutex(lsm_env *pEnv){ |
| lsm_mutex *p; |
| lsmMutexStatic(pEnv, LSM_MUTEX_GLOBAL, &p); |
| lsmMutexLeave(pEnv, p); |
| } |
| |
| #ifdef LSM_DEBUG |
| static int holdingGlobalMutex(lsm_env *pEnv){ |
| lsm_mutex *p; |
| lsmMutexStatic(pEnv, LSM_MUTEX_GLOBAL, &p); |
| return lsmMutexHeld(pEnv, p); |
| } |
| #endif |
| |
| #if 0 |
| static void assertNotInFreelist(Freelist *p, int iBlk){ |
| int i; |
| for(i=0; i<p->nEntry; i++){ |
| assert( p->aEntry[i].iBlk!=iBlk ); |
| } |
| } |
| #else |
| # define assertNotInFreelist(x,y) |
| #endif |
| |
| /* |
| ** Append an entry to the free-list. If (iId==-1), this is a delete. |
| */ |
| int freelistAppend(lsm_db *db, u32 iBlk, i64 iId){ |
| lsm_env *pEnv = db->pEnv; |
| Freelist *p; |
| int i; |
| |
| assert( iId==-1 || iId>=0 ); |
| p = db->bUseFreelist ? db->pFreelist : &db->pWorker->freelist; |
| |
| /* Extend the space allocated for the freelist, if required */ |
| assert( p->nAlloc>=p->nEntry ); |
| if( p->nAlloc==p->nEntry ){ |
| int nNew; |
| int nByte; |
| FreelistEntry *aNew; |
| |
| nNew = (p->nAlloc==0 ? 4 : p->nAlloc*2); |
| nByte = sizeof(FreelistEntry) * nNew; |
| aNew = (FreelistEntry *)lsmRealloc(pEnv, p->aEntry, nByte); |
| if( !aNew ) return LSM_NOMEM_BKPT; |
| p->nAlloc = nNew; |
| p->aEntry = aNew; |
| } |
| |
| for(i=0; i<p->nEntry; i++){ |
| assert( i==0 || p->aEntry[i].iBlk > p->aEntry[i-1].iBlk ); |
| if( p->aEntry[i].iBlk>=iBlk ) break; |
| } |
| |
| if( i<p->nEntry && p->aEntry[i].iBlk==iBlk ){ |
| /* Clobber an existing entry */ |
| p->aEntry[i].iId = iId; |
| }else{ |
| /* Insert a new entry into the list */ |
| int nByte = sizeof(FreelistEntry)*(p->nEntry-i); |
| memmove(&p->aEntry[i+1], &p->aEntry[i], nByte); |
| p->aEntry[i].iBlk = iBlk; |
| p->aEntry[i].iId = iId; |
| p->nEntry++; |
| } |
| |
| return LSM_OK; |
| } |
| |
| /* |
| ** This function frees all resources held by the Database structure passed |
| ** as the only argument. |
| */ |
| static void freeDatabase(lsm_env *pEnv, Database *p){ |
| assert( holdingGlobalMutex(pEnv) ); |
| if( p ){ |
| /* Free the mutexes */ |
| lsmMutexDel(pEnv, p->pClientMutex); |
| |
| if( p->pFile ){ |
| lsmEnvClose(pEnv, p->pFile); |
| } |
| |
| /* Free the array of shm pointers */ |
| lsmFree(pEnv, p->apShmChunk); |
| |
| /* Free the memory allocated for the Database struct itself */ |
| lsmFree(pEnv, p); |
| } |
| } |
| |
| typedef struct DbTruncateCtx DbTruncateCtx; |
| struct DbTruncateCtx { |
| int nBlock; |
| i64 iInUse; |
| }; |
| |
| static int dbTruncateCb(void *pCtx, int iBlk, i64 iSnapshot){ |
| DbTruncateCtx *p = (DbTruncateCtx *)pCtx; |
| if( iBlk!=p->nBlock || (p->iInUse>=0 && iSnapshot>=p->iInUse) ) return 1; |
| p->nBlock--; |
| return 0; |
| } |
| |
| static int dbTruncate(lsm_db *pDb, i64 iInUse){ |
| int rc = LSM_OK; |
| #if 0 |
| int i; |
| DbTruncateCtx ctx; |
| |
| assert( pDb->pWorker ); |
| ctx.nBlock = pDb->pWorker->nBlock; |
| ctx.iInUse = iInUse; |
| |
| rc = lsmWalkFreelist(pDb, 1, dbTruncateCb, (void *)&ctx); |
| for(i=ctx.nBlock+1; rc==LSM_OK && i<=pDb->pWorker->nBlock; i++){ |
| rc = freelistAppend(pDb, i, -1); |
| } |
| |
| if( rc==LSM_OK ){ |
| #ifdef LSM_LOG_FREELIST |
| if( ctx.nBlock!=pDb->pWorker->nBlock ){ |
| lsmLogMessage(pDb, 0, |
| "dbTruncate(): truncated db to %d blocks",ctx.nBlock |
| ); |
| } |
| #endif |
| pDb->pWorker->nBlock = ctx.nBlock; |
| } |
| #endif |
| return rc; |
| } |
| |
| |
| /* |
| ** This function is called during database shutdown (when the number of |
| ** connections drops from one to zero). It truncates the database file |
| ** to as small a size as possible without truncating away any blocks that |
| ** contain data. |
| */ |
| static int dbTruncateFile(lsm_db *pDb){ |
| int rc; |
| |
| assert( pDb->pWorker==0 ); |
| assert( lsmShmAssertLock(pDb, LSM_LOCK_DMS1, LSM_LOCK_EXCL) ); |
| rc = lsmCheckpointLoadWorker(pDb); |
| |
| if( rc==LSM_OK ){ |
| DbTruncateCtx ctx; |
| |
| /* Walk the database free-block-list in reverse order. Set ctx.nBlock |
| ** to the block number of the last block in the database that actually |
| ** contains data. */ |
| ctx.nBlock = pDb->pWorker->nBlock; |
| ctx.iInUse = -1; |
| rc = lsmWalkFreelist(pDb, 1, dbTruncateCb, (void *)&ctx); |
| |
| /* If the last block that contains data is not already the last block in |
| ** the database file, truncate the database file so that it is. */ |
| if( rc==LSM_OK ){ |
| rc = lsmFsTruncateDb( |
| pDb->pFS, (i64)ctx.nBlock*lsmFsBlockSize(pDb->pFS) |
| ); |
| } |
| } |
| |
| lsmFreeSnapshot(pDb->pEnv, pDb->pWorker); |
| pDb->pWorker = 0; |
| return rc; |
| } |
| |
| static void doDbDisconnect(lsm_db *pDb){ |
| int rc; |
| |
| if( pDb->bReadonly ){ |
| lsmShmLock(pDb, LSM_LOCK_DMS3, LSM_LOCK_UNLOCK, 0); |
| }else{ |
| /* Block for an exclusive lock on DMS1. This lock serializes all calls |
| ** to doDbConnect() and doDbDisconnect() across all processes. */ |
| rc = lsmShmLock(pDb, LSM_LOCK_DMS1, LSM_LOCK_EXCL, 1); |
| if( rc==LSM_OK ){ |
| |
| lsmShmLock(pDb, LSM_LOCK_DMS2, LSM_LOCK_UNLOCK, 0); |
| |
| /* Try an exclusive lock on DMS2. If successful, this is the last |
| ** connection to the database. In this case flush the contents of the |
| ** in-memory tree to disk and write a checkpoint. */ |
| rc = lsmShmTestLock(pDb, LSM_LOCK_DMS2, 1, LSM_LOCK_EXCL); |
| if( rc==LSM_OK ){ |
| rc = lsmShmTestLock(pDb, LSM_LOCK_CHECKPOINTER, 1, LSM_LOCK_EXCL); |
| } |
| if( rc==LSM_OK ){ |
| int bReadonly = 0; /* True if there exist read-only conns. */ |
| |
| /* Flush the in-memory tree, if required. If there is data to flush, |
| ** this will create a new client snapshot in Database.pClient. The |
| ** checkpoint (serialization) of this snapshot may be written to disk |
| ** by the following block. |
| ** |
| ** There is no need to take a WRITER lock here. That there are no |
| ** other locks on DMS2 guarantees that there are no other read-write |
| ** connections at this time (and the lock on DMS1 guarantees that |
| ** no new ones may appear). |
| */ |
| rc = lsmTreeLoadHeader(pDb, 0); |
| if( rc==LSM_OK && (lsmTreeHasOld(pDb) || lsmTreeSize(pDb)>0) ){ |
| rc = lsmFlushTreeToDisk(pDb); |
| } |
| |
| /* Now check if there are any read-only connections. If there are, |
| ** then do not truncate the db file or unlink the shared-memory |
| ** region. */ |
| if( rc==LSM_OK ){ |
| rc = lsmShmTestLock(pDb, LSM_LOCK_DMS3, 1, LSM_LOCK_EXCL); |
| if( rc==LSM_BUSY ){ |
| bReadonly = 1; |
| rc = LSM_OK; |
| } |
| } |
| |
| /* Write a checkpoint to disk. */ |
| if( rc==LSM_OK ){ |
| rc = lsmCheckpointWrite(pDb, 0); |
| } |
| |
| /* If the checkpoint was written successfully, delete the log file |
| ** and, if possible, truncate the database file. */ |
| if( rc==LSM_OK ){ |
| int bRotrans = 0; |
| Database *p = pDb->pDatabase; |
| |
| /* The log file may only be deleted if there are no clients |
| ** read-only clients running rotrans transactions. */ |
| rc = lsmDetectRoTrans(pDb, &bRotrans); |
| if( rc==LSM_OK && bRotrans==0 ){ |
| lsmFsCloseAndDeleteLog(pDb->pFS); |
| } |
| |
| /* The database may only be truncated if there exist no read-only |
| ** clients - either connected or running rotrans transactions. */ |
| if( bReadonly==0 && bRotrans==0 ){ |
| lsmFsUnmap(pDb->pFS); |
| dbTruncateFile(pDb); |
| if( p->pFile && p->bMultiProc ){ |
| lsmEnvShmUnmap(pDb->pEnv, p->pFile, 1); |
| } |
| } |
| } |
| } |
| } |
| |
| if( pDb->iRwclient>=0 ){ |
| lsmShmLock(pDb, LSM_LOCK_RWCLIENT(pDb->iRwclient), LSM_LOCK_UNLOCK, 0); |
| pDb->iRwclient = -1; |
| } |
| |
| lsmShmLock(pDb, LSM_LOCK_DMS1, LSM_LOCK_UNLOCK, 0); |
| } |
| pDb->pShmhdr = 0; |
| } |
| |
| static int doDbConnect(lsm_db *pDb){ |
| const int nUsMax = 100000; /* Max value for nUs */ |
| int nUs = 1000; /* us to wait between DMS1 attempts */ |
| int rc; |
| |
| /* Obtain a pointer to the shared-memory header */ |
| assert( pDb->pShmhdr==0 ); |
| assert( pDb->bReadonly==0 ); |
| |
| /* Block for an exclusive lock on DMS1. This lock serializes all calls |
| ** to doDbConnect() and doDbDisconnect() across all processes. */ |
| while( 1 ){ |
| rc = lsmShmLock(pDb, LSM_LOCK_DMS1, LSM_LOCK_EXCL, 1); |
| if( rc!=LSM_BUSY ) break; |
| lsmEnvSleep(pDb->pEnv, nUs); |
| nUs = nUs * 2; |
| if( nUs>nUsMax ) nUs = nUsMax; |
| } |
| if( rc==LSM_OK ){ |
| rc = lsmShmCacheChunks(pDb, 1); |
| } |
| if( rc!=LSM_OK ) return rc; |
| pDb->pShmhdr = (ShmHeader *)pDb->apShm[0]; |
| |
| /* Try an exclusive lock on DMS2/DMS3. If successful, this is the first |
| ** and only connection to the database. In this case initialize the |
| ** shared-memory and run log file recovery. */ |
| assert( LSM_LOCK_DMS3==1+LSM_LOCK_DMS2 ); |
| rc = lsmShmTestLock(pDb, LSM_LOCK_DMS2, 2, LSM_LOCK_EXCL); |
| if( rc==LSM_OK ){ |
| memset(pDb->pShmhdr, 0, sizeof(ShmHeader)); |
| rc = lsmCheckpointRecover(pDb); |
| if( rc==LSM_OK ){ |
| rc = lsmLogRecover(pDb); |
| } |
| if( rc==LSM_OK ){ |
| ShmHeader *pShm = pDb->pShmhdr; |
| pShm->aReader[0].iLsmId = lsmCheckpointId(pShm->aSnap1, 0); |
| pShm->aReader[0].iTreeId = pDb->treehdr.iUsedShmid; |
| } |
| }else if( rc==LSM_BUSY ){ |
| rc = LSM_OK; |
| } |
| |
| /* Take a shared lock on DMS2. In multi-process mode this lock "cannot" |
| ** fail, as connections may only hold an exclusive lock on DMS2 if they |
| ** first hold an exclusive lock on DMS1. And this connection is currently |
| ** holding the exclusive lock on DSM1. |
| ** |
| ** However, if some other connection has the database open in single-process |
| ** mode, this operation will fail. In this case, return the error to the |
| ** caller - the attempt to connect to the db has failed. |
| */ |
| if( rc==LSM_OK ){ |
| rc = lsmShmLock(pDb, LSM_LOCK_DMS2, LSM_LOCK_SHARED, 0); |
| } |
| |
| /* If anything went wrong, unlock DMS2. Otherwise, try to take an exclusive |
| ** lock on one of the LSM_LOCK_RWCLIENT() locks. Unlock DMS1 in any case. */ |
| if( rc!=LSM_OK ){ |
| pDb->pShmhdr = 0; |
| }else{ |
| int i; |
| for(i=0; i<LSM_LOCK_NRWCLIENT; i++){ |
| int rc2 = lsmShmLock(pDb, LSM_LOCK_RWCLIENT(i), LSM_LOCK_EXCL, 0); |
| if( rc2==LSM_OK ) pDb->iRwclient = i; |
| if( rc2!=LSM_BUSY ){ |
| rc = rc2; |
| break; |
| } |
| } |
| } |
| lsmShmLock(pDb, LSM_LOCK_DMS1, LSM_LOCK_UNLOCK, 0); |
| |
| return rc; |
| } |
| |
| static int dbOpenSharedFd(lsm_env *pEnv, Database *p, int bRoOk){ |
| int rc; |
| |
| rc = lsmEnvOpen(pEnv, p->zName, 0, &p->pFile); |
| if( rc==LSM_IOERR && bRoOk ){ |
| rc = lsmEnvOpen(pEnv, p->zName, LSM_OPEN_READONLY, &p->pFile); |
| p->bReadonly = 1; |
| } |
| |
| return rc; |
| } |
| |
| /* |
| ** Return a reference to the shared Database handle for the database |
| ** identified by canonical path zName. If this is the first connection to |
| ** the named database, a new Database object is allocated. Otherwise, a |
| ** pointer to an existing object is returned. |
| ** |
| ** If successful, *ppDatabase is set to point to the shared Database |
| ** structure and LSM_OK returned. Otherwise, *ppDatabase is set to NULL |
| ** and and LSM error code returned. |
| ** |
| ** Each successful call to this function should be (eventually) matched |
| ** by a call to lsmDbDatabaseRelease(). |
| */ |
| int lsmDbDatabaseConnect( |
| lsm_db *pDb, /* Database handle */ |
| const char *zName /* Full-path to db file */ |
| ){ |
| lsm_env *pEnv = pDb->pEnv; |
| int rc; /* Return code */ |
| Database *p = 0; /* Pointer returned via *ppDatabase */ |
| int nName = lsmStrlen(zName); |
| |
| assert( pDb->pDatabase==0 ); |
| rc = enterGlobalMutex(pEnv); |
| if( rc==LSM_OK ){ |
| |
| /* Search the global list for an existing object. TODO: Need something |
| ** better than the memcmp() below to figure out if a given Database |
| ** object represents the requested file. */ |
| for(p=gShared.pDatabase; p; p=p->pDbNext){ |
| if( nName==p->nName && 0==memcmp(zName, p->zName, nName) ) break; |
| } |
| |
| /* If no suitable Database object was found, allocate a new one. */ |
| if( p==0 ){ |
| p = (Database *)lsmMallocZeroRc(pEnv, sizeof(Database)+nName+1, &rc); |
| |
| /* If the allocation was successful, fill in other fields and |
| ** allocate the client mutex. */ |
| if( rc==LSM_OK ){ |
| p->bMultiProc = pDb->bMultiProc; |
| p->zName = (char *)&p[1]; |
| p->nName = nName; |
| memcpy((void *)p->zName, zName, nName+1); |
| rc = lsmMutexNew(pEnv, &p->pClientMutex); |
| } |
| |
| /* If nothing has gone wrong so far, open the shared fd. And if that |
| ** succeeds and this connection requested single-process mode, |
| ** attempt to take the exclusive lock on DMS2. */ |
| if( rc==LSM_OK ){ |
| int bReadonly = (pDb->bReadonly && pDb->bMultiProc); |
| rc = dbOpenSharedFd(pDb->pEnv, p, bReadonly); |
| } |
| |
| if( rc==LSM_OK && p->bMultiProc==0 ){ |
| /* Hold an exclusive lock DMS1 while grabbing DMS2. This ensures |
| ** that any ongoing call to doDbDisconnect() (even one in another |
| ** process) is finished before proceeding. */ |
| assert( p->bReadonly==0 ); |
| rc = lsmEnvLock(pDb->pEnv, p->pFile, LSM_LOCK_DMS1, LSM_LOCK_EXCL); |
| if( rc==LSM_OK ){ |
| rc = lsmEnvLock(pDb->pEnv, p->pFile, LSM_LOCK_DMS2, LSM_LOCK_EXCL); |
| lsmEnvLock(pDb->pEnv, p->pFile, LSM_LOCK_DMS1, LSM_LOCK_UNLOCK); |
| } |
| } |
| |
| if( rc==LSM_OK ){ |
| p->pDbNext = gShared.pDatabase; |
| gShared.pDatabase = p; |
| }else{ |
| freeDatabase(pEnv, p); |
| p = 0; |
| } |
| } |
| |
| if( p ){ |
| p->nDbRef++; |
| } |
| leaveGlobalMutex(pEnv); |
| |
| if( p ){ |
| lsmMutexEnter(pDb->pEnv, p->pClientMutex); |
| pDb->pNext = p->pConn; |
| p->pConn = pDb; |
| lsmMutexLeave(pDb->pEnv, p->pClientMutex); |
| } |
| } |
| |
| pDb->pDatabase = p; |
| if( rc==LSM_OK ){ |
| assert( p ); |
| rc = lsmFsOpen(pDb, zName, p->bReadonly); |
| } |
| |
| /* If the db handle is read-write, then connect to the system now. Run |
| ** recovery as necessary. Or, if this is a read-only database handle, |
| ** defer attempting to connect to the system until a read-transaction |
| ** is opened. */ |
| if( rc==LSM_OK ){ |
| rc = lsmFsConfigure(pDb); |
| } |
| if( rc==LSM_OK && pDb->bReadonly==0 ){ |
| rc = doDbConnect(pDb); |
| } |
| |
| return rc; |
| } |
| |
| static void dbDeferClose(lsm_db *pDb){ |
| if( pDb->pFS ){ |
| LsmFile *pLsmFile; |
| Database *p = pDb->pDatabase; |
| pLsmFile = lsmFsDeferClose(pDb->pFS); |
| pLsmFile->pNext = p->pLsmFile; |
| p->pLsmFile = pLsmFile; |
| } |
| } |
| |
| LsmFile *lsmDbRecycleFd(lsm_db *db){ |
| LsmFile *pRet; |
| Database *p = db->pDatabase; |
| lsmMutexEnter(db->pEnv, p->pClientMutex); |
| if( (pRet = p->pLsmFile)!=0 ){ |
| p->pLsmFile = pRet->pNext; |
| } |
| lsmMutexLeave(db->pEnv, p->pClientMutex); |
| return pRet; |
| } |
| |
| /* |
| ** Release a reference to a Database object obtained from |
| ** lsmDbDatabaseConnect(). There should be exactly one call to this function |
| ** for each successful call to Find(). |
| */ |
| void lsmDbDatabaseRelease(lsm_db *pDb){ |
| Database *p = pDb->pDatabase; |
| if( p ){ |
| lsm_db **ppDb; |
| |
| if( pDb->pShmhdr ){ |
| doDbDisconnect(pDb); |
| } |
| |
| lsmFsUnmap(pDb->pFS); |
| lsmMutexEnter(pDb->pEnv, p->pClientMutex); |
| for(ppDb=&p->pConn; *ppDb!=pDb; ppDb=&((*ppDb)->pNext)); |
| *ppDb = pDb->pNext; |
| dbDeferClose(pDb); |
| lsmMutexLeave(pDb->pEnv, p->pClientMutex); |
| |
| enterGlobalMutex(pDb->pEnv); |
| p->nDbRef--; |
| if( p->nDbRef==0 ){ |
| LsmFile *pIter; |
| LsmFile *pNext; |
| Database **pp; |
| |
| /* Remove the Database structure from the linked list. */ |
| for(pp=&gShared.pDatabase; *pp!=p; pp=&((*pp)->pDbNext)); |
| *pp = p->pDbNext; |
| |
| /* If they were allocated from the heap, free the shared memory chunks */ |
| if( p->bMultiProc==0 ){ |
| int i; |
| for(i=0; i<p->nShmChunk; i++){ |
| lsmFree(pDb->pEnv, p->apShmChunk[i]); |
| } |
| } |
| |
| /* Close any outstanding file descriptors */ |
| for(pIter=p->pLsmFile; pIter; pIter=pNext){ |
| pNext = pIter->pNext; |
| lsmEnvClose(pDb->pEnv, pIter->pFile); |
| lsmFree(pDb->pEnv, pIter); |
| } |
| freeDatabase(pDb->pEnv, p); |
| } |
| leaveGlobalMutex(pDb->pEnv); |
| } |
| } |
| |
| Level *lsmDbSnapshotLevel(Snapshot *pSnapshot){ |
| return pSnapshot->pLevel; |
| } |
| |
| void lsmDbSnapshotSetLevel(Snapshot *pSnap, Level *pLevel){ |
| pSnap->pLevel = pLevel; |
| } |
| |
| /* TODO: Shuffle things around to get rid of this */ |
| static int firstSnapshotInUse(lsm_db *, i64 *); |
| |
| /* |
| ** Context object used by the lsmWalkFreelist() utility. |
| */ |
| typedef struct WalkFreelistCtx WalkFreelistCtx; |
| struct WalkFreelistCtx { |
| lsm_db *pDb; |
| int bReverse; |
| Freelist *pFreelist; |
| int iFree; |
| int (*xUsr)(void *, int, i64); /* User callback function */ |
| void *pUsrctx; /* User callback context */ |
| int bDone; /* Set to true after xUsr() returns true */ |
| }; |
| |
| /* |
| ** Callback used by lsmWalkFreelist(). |
| */ |
| static int walkFreelistCb(void *pCtx, int iBlk, i64 iSnapshot){ |
| WalkFreelistCtx *p = (WalkFreelistCtx *)pCtx; |
| const int iDir = (p->bReverse ? -1 : 1); |
| Freelist *pFree = p->pFreelist; |
| |
| assert( p->bDone==0 ); |
| assert( iBlk>=0 ); |
| if( pFree ){ |
| while( (p->iFree < pFree->nEntry) && p->iFree>=0 ){ |
| FreelistEntry *pEntry = &pFree->aEntry[p->iFree]; |
| if( (p->bReverse==0 && pEntry->iBlk>(u32)iBlk) |
| || (p->bReverse!=0 && pEntry->iBlk<(u32)iBlk) |
| ){ |
| break; |
| }else{ |
| p->iFree += iDir; |
| if( pEntry->iId>=0 |
| && p->xUsr(p->pUsrctx, pEntry->iBlk, pEntry->iId) |
| ){ |
| p->bDone = 1; |
| return 1; |
| } |
| if( pEntry->iBlk==(u32)iBlk ) return 0; |
| } |
| } |
| } |
| |
| if( p->xUsr(p->pUsrctx, iBlk, iSnapshot) ){ |
| p->bDone = 1; |
| return 1; |
| } |
| return 0; |
| } |
| |
| /* |
| ** The database handle passed as the first argument must be the worker |
| ** connection. This function iterates through the contents of the current |
| ** free block list, invoking the supplied callback once for each list |
| ** element. |
| ** |
| ** The difference between this function and lsmSortedWalkFreelist() is |
| ** that lsmSortedWalkFreelist() only considers those free-list elements |
| ** stored within the LSM. This function also merges in any in-memory |
| ** elements. |
| */ |
| int lsmWalkFreelist( |
| lsm_db *pDb, /* Database handle (must be worker) */ |
| int bReverse, /* True to iterate from largest to smallest */ |
| int (*x)(void *, int, i64), /* Callback function */ |
| void *pCtx /* First argument to pass to callback */ |
| ){ |
| const int iDir = (bReverse ? -1 : 1); |
| int rc; |
| int iCtx; |
| |
| WalkFreelistCtx ctx[2]; |
| |
| ctx[0].pDb = pDb; |
| ctx[0].bReverse = bReverse; |
| ctx[0].pFreelist = &pDb->pWorker->freelist; |
| if( ctx[0].pFreelist && bReverse ){ |
| ctx[0].iFree = ctx[0].pFreelist->nEntry-1; |
| }else{ |
| ctx[0].iFree = 0; |
| } |
| ctx[0].xUsr = walkFreelistCb; |
| ctx[0].pUsrctx = (void *)&ctx[1]; |
| ctx[0].bDone = 0; |
| |
| ctx[1].pDb = pDb; |
| ctx[1].bReverse = bReverse; |
| ctx[1].pFreelist = pDb->pFreelist; |
| if( ctx[1].pFreelist && bReverse ){ |
| ctx[1].iFree = ctx[1].pFreelist->nEntry-1; |
| }else{ |
| ctx[1].iFree = 0; |
| } |
| ctx[1].xUsr = x; |
| ctx[1].pUsrctx = pCtx; |
| ctx[1].bDone = 0; |
| |
| rc = lsmSortedWalkFreelist(pDb, bReverse, walkFreelistCb, (void *)&ctx[0]); |
| |
| if( ctx[0].bDone==0 ){ |
| for(iCtx=0; iCtx<2; iCtx++){ |
| int i; |
| WalkFreelistCtx *p = &ctx[iCtx]; |
| for(i=p->iFree; |
| p->pFreelist && rc==LSM_OK && i<p->pFreelist->nEntry && i>=0; |
| i += iDir |
| ){ |
| FreelistEntry *pEntry = &p->pFreelist->aEntry[i]; |
| if( pEntry->iId>=0 && p->xUsr(p->pUsrctx, pEntry->iBlk, pEntry->iId) ){ |
| return LSM_OK; |
| } |
| } |
| } |
| } |
| |
| return rc; |
| } |
| |
| |
| typedef struct FindFreeblockCtx FindFreeblockCtx; |
| struct FindFreeblockCtx { |
| i64 iInUse; |
| int iRet; |
| int bNotOne; |
| }; |
| |
| static int findFreeblockCb(void *pCtx, int iBlk, i64 iSnapshot){ |
| FindFreeblockCtx *p = (FindFreeblockCtx *)pCtx; |
| if( iSnapshot<p->iInUse && (iBlk!=1 || p->bNotOne==0) ){ |
| p->iRet = iBlk; |
| return 1; |
| } |
| return 0; |
| } |
| |
| static int findFreeblock(lsm_db *pDb, i64 iInUse, int bNotOne, int *piRet){ |
| int rc; /* Return code */ |
| FindFreeblockCtx ctx; /* Context object */ |
| |
| ctx.iInUse = iInUse; |
| ctx.iRet = 0; |
| ctx.bNotOne = bNotOne; |
| rc = lsmWalkFreelist(pDb, 0, findFreeblockCb, (void *)&ctx); |
| *piRet = ctx.iRet; |
| |
| return rc; |
| } |
| |
| /* |
| ** Allocate a new database file block to write data to, either by extending |
| ** the database file or by recycling a free-list entry. The worker snapshot |
| ** must be held in order to call this function. |
| ** |
| ** If successful, *piBlk is set to the block number allocated and LSM_OK is |
| ** returned. Otherwise, *piBlk is zeroed and an lsm error code returned. |
| */ |
| int lsmBlockAllocate(lsm_db *pDb, int iBefore, int *piBlk){ |
| Snapshot *p = pDb->pWorker; |
| int iRet = 0; /* Block number of allocated block */ |
| int rc = LSM_OK; |
| i64 iInUse = 0; /* Snapshot id still in use */ |
| i64 iSynced = 0; /* Snapshot id synced to disk */ |
| |
| assert( p ); |
| |
| #ifdef LSM_LOG_FREELIST |
| { |
| static int nCall = 0; |
| char *zFree = 0; |
| nCall++; |
| rc = lsmInfoFreelist(pDb, &zFree); |
| if( rc!=LSM_OK ) return rc; |
| lsmLogMessage(pDb, 0, "lsmBlockAllocate(): %d freelist: %s", nCall, zFree); |
| lsmFree(pDb->pEnv, zFree); |
| } |
| #endif |
| |
| /* Set iInUse to the smallest snapshot id that is either: |
| ** |
| ** * Currently in use by a database client, |
| ** * May be used by a database client in the future, or |
| ** * Is the most recently checkpointed snapshot (i.e. the one that will |
| ** be used following recovery if a failure occurs at this point). |
| */ |
| rc = lsmCheckpointSynced(pDb, &iSynced, 0, 0); |
| if( rc==LSM_OK && iSynced==0 ) iSynced = p->iId; |
| iInUse = iSynced; |
| if( rc==LSM_OK && pDb->iReader>=0 ){ |
| assert( pDb->pClient ); |
| iInUse = LSM_MIN(iInUse, pDb->pClient->iId); |
| } |
| if( rc==LSM_OK ) rc = firstSnapshotInUse(pDb, &iInUse); |
| |
| #ifdef LSM_LOG_FREELIST |
| { |
| lsmLogMessage(pDb, 0, "lsmBlockAllocate(): " |
| "snapshot-in-use: %lld (iSynced=%lld) (client-id=%lld)", |
| iInUse, iSynced, (pDb->iReader>=0 ? pDb->pClient->iId : 0) |
| ); |
| } |
| #endif |
| |
| |
| /* Unless there exists a read-only transaction (which prevents us from |
| ** recycling any blocks regardless, query the free block list for a |
| ** suitable block to reuse. |
| ** |
| ** It might seem more natural to check for a read-only transaction at |
| ** the start of this function. However, it is better do wait until after |
| ** the call to lsmCheckpointSynced() to do so. |
| */ |
| if( rc==LSM_OK ){ |
| int bRotrans; |
| rc = lsmDetectRoTrans(pDb, &bRotrans); |
| |
| if( rc==LSM_OK && bRotrans==0 ){ |
| rc = findFreeblock(pDb, iInUse, (iBefore>0), &iRet); |
| } |
| } |
| |
| if( iBefore>0 && (iRet<=0 || iRet>=iBefore) ){ |
| iRet = 0; |
| |
| }else if( rc==LSM_OK ){ |
| /* If a block was found in the free block list, use it and remove it from |
| ** the list. Otherwise, if no suitable block was found, allocate one from |
| ** the end of the file. */ |
| if( iRet>0 ){ |
| #ifdef LSM_LOG_FREELIST |
| lsmLogMessage(pDb, 0, |
| "reusing block %d (snapshot-in-use=%lld)", iRet, iInUse); |
| #endif |
| rc = freelistAppend(pDb, iRet, -1); |
| if( rc==LSM_OK ){ |
| rc = dbTruncate(pDb, iInUse); |
| } |
| }else{ |
| iRet = ++(p->nBlock); |
| #ifdef LSM_LOG_FREELIST |
| lsmLogMessage(pDb, 0, "extending file to %d blocks", iRet); |
| #endif |
| } |
| } |
| |
| assert( iBefore>0 || iRet>0 || rc!=LSM_OK ); |
| *piBlk = iRet; |
| return rc; |
| } |
| |
| /* |
| ** Free a database block. The worker snapshot must be held in order to call |
| ** this function. |
| ** |
| ** If successful, LSM_OK is returned. Otherwise, an lsm error code (e.g. |
| ** LSM_NOMEM). |
| */ |
| int lsmBlockFree(lsm_db *pDb, int iBlk){ |
| Snapshot *p = pDb->pWorker; |
| assert( lsmShmAssertWorker(pDb) ); |
| |
| #ifdef LSM_LOG_FREELIST |
| lsmLogMessage(pDb, LSM_OK, "lsmBlockFree(): Free block %d", iBlk); |
| #endif |
| |
| return freelistAppend(pDb, iBlk, p->iId); |
| } |
| |
| /* |
| ** Refree a database block. The worker snapshot must be held in order to call |
| ** this function. |
| ** |
| ** Refreeing is required when a block is allocated using lsmBlockAllocate() |
| ** but then not used. This function is used to push the block back onto |
| ** the freelist. Refreeing a block is different from freeing is, as a refreed |
| ** block may be reused immediately. Whereas a freed block can not be reused |
| ** until (at least) after the next checkpoint. |
| */ |
| int lsmBlockRefree(lsm_db *pDb, int iBlk){ |
| int rc = LSM_OK; /* Return code */ |
| |
| #ifdef LSM_LOG_FREELIST |
| lsmLogMessage(pDb, LSM_OK, "lsmBlockRefree(): Refree block %d", iBlk); |
| #endif |
| |
| rc = freelistAppend(pDb, iBlk, 0); |
| return rc; |
| } |
| |
| /* |
| ** If required, copy a database checkpoint from shared memory into the |
| ** database itself. |
| ** |
| ** The WORKER lock must not be held when this is called. This is because |
| ** this function may indirectly call fsync(). And the WORKER lock should |
| ** not be held that long (in case it is required by a client flushing an |
| ** in-memory tree to disk). |
| */ |
| int lsmCheckpointWrite(lsm_db *pDb, u32 *pnWrite){ |
| int rc; /* Return Code */ |
| u32 nWrite = 0; |
| |
| assert( pDb->pWorker==0 ); |
| assert( 1 || pDb->pClient==0 ); |
| assert( lsmShmAssertLock(pDb, LSM_LOCK_WORKER, LSM_LOCK_UNLOCK) ); |
| |
| rc = lsmShmLock(pDb, LSM_LOCK_CHECKPOINTER, LSM_LOCK_EXCL, 0); |
| if( rc!=LSM_OK ) return rc; |
| |
| rc = lsmCheckpointLoad(pDb, 0); |
| if( rc==LSM_OK ){ |
| int nBlock = lsmCheckpointNBlock(pDb->aSnapshot); |
| ShmHeader *pShm = pDb->pShmhdr; |
| int bDone = 0; /* True if checkpoint is already stored */ |
| |
| /* Check if this checkpoint has already been written to the database |
| ** file. If so, set variable bDone to true. */ |
| if( pShm->iMetaPage ){ |
| MetaPage *pPg; /* Meta page */ |
| u8 *aData; /* Meta-page data buffer */ |
| int nData; /* Size of aData[] in bytes */ |
| i64 iCkpt; /* Id of checkpoint just loaded */ |
| i64 iDisk = 0; /* Id of checkpoint already stored in db */ |
| iCkpt = lsmCheckpointId(pDb->aSnapshot, 0); |
| rc = lsmFsMetaPageGet(pDb->pFS, 0, pShm->iMetaPage, &pPg); |
| if( rc==LSM_OK ){ |
| aData = lsmFsMetaPageData(pPg, &nData); |
| iDisk = lsmCheckpointId((u32 *)aData, 1); |
| nWrite = lsmCheckpointNWrite((u32 *)aData, 1); |
| lsmFsMetaPageRelease(pPg); |
| } |
| bDone = (iDisk>=iCkpt); |
| } |
| |
| if( rc==LSM_OK && bDone==0 ){ |
| int iMeta = (pShm->iMetaPage % 2) + 1; |
| if( pDb->eSafety!=LSM_SAFETY_OFF ){ |
| rc = lsmFsSyncDb(pDb->pFS, nBlock); |
| } |
| if( rc==LSM_OK ) rc = lsmCheckpointStore(pDb, iMeta); |
| if( rc==LSM_OK && pDb->eSafety!=LSM_SAFETY_OFF){ |
| rc = lsmFsSyncDb(pDb->pFS, 0); |
| } |
| if( rc==LSM_OK ){ |
| pShm->iMetaPage = iMeta; |
| nWrite = lsmCheckpointNWrite(pDb->aSnapshot, 0) - nWrite; |
| } |
| #ifdef LSM_LOG_WORK |
| lsmLogMessage(pDb, 0, "finish checkpoint %d", |
| (int)lsmCheckpointId(pDb->aSnapshot, 0) |
| ); |
| #endif |
| } |
| } |
| |
| lsmShmLock(pDb, LSM_LOCK_CHECKPOINTER, LSM_LOCK_UNLOCK, 0); |
| if( pnWrite && rc==LSM_OK ) *pnWrite = nWrite; |
| return rc; |
| } |
| |
| int lsmBeginWork(lsm_db *pDb){ |
| int rc; |
| |
| /* Attempt to take the WORKER lock */ |
| rc = lsmShmLock(pDb, LSM_LOCK_WORKER, LSM_LOCK_EXCL, 0); |
| |
| /* Deserialize the current worker snapshot */ |
| if( rc==LSM_OK ){ |
| rc = lsmCheckpointLoadWorker(pDb); |
| } |
| return rc; |
| } |
| |
| void lsmFreeSnapshot(lsm_env *pEnv, Snapshot *p){ |
| if( p ){ |
| lsmSortedFreeLevel(pEnv, p->pLevel); |
| lsmFree(pEnv, p->freelist.aEntry); |
| lsmFree(pEnv, p->redirect.a); |
| lsmFree(pEnv, p); |
| } |
| } |
| |
| /* |
| ** Attempt to populate one of the read-lock slots to contain lock values |
| ** iLsm/iShm. Or, if such a slot exists already, this function is a no-op. |
| ** |
| ** It is not an error if no slot can be populated because the write-lock |
| ** cannot be obtained. If any other error occurs, return an LSM error code. |
| ** Otherwise, LSM_OK. |
| ** |
| ** This function is called at various points to try to ensure that there |
| ** always exists at least one read-lock slot that can be used by a read-only |
| ** client. And so that, in the usual case, there is an "exact match" available |
| ** whenever a read transaction is opened by any client. At present this |
| ** function is called when: |
| ** |
| ** * A write transaction that called lsmTreeDiscardOld() is committed, and |
| ** * Whenever the working snapshot is updated (i.e. lsmFinishWork()). |
| */ |
| static int dbSetReadLock(lsm_db *db, i64 iLsm, u32 iShm){ |
| int rc = LSM_OK; |
| ShmHeader *pShm = db->pShmhdr; |
| int i; |
| |
| /* Check if there is already a slot containing the required values. */ |
| for(i=0; i<LSM_LOCK_NREADER; i++){ |
| ShmReader *p = &pShm->aReader[i]; |
| if( p->iLsmId==iLsm && p->iTreeId==iShm ) return LSM_OK; |
| } |
| |
| /* Iterate through all read-lock slots, attempting to take a write-lock |
| ** on each of them. If a write-lock succeeds, populate the locked slot |
| ** with the required values and break out of the loop. */ |
| for(i=0; rc==LSM_OK && i<LSM_LOCK_NREADER; i++){ |
| rc = lsmShmLock(db, LSM_LOCK_READER(i), LSM_LOCK_EXCL, 0); |
| if( rc==LSM_BUSY ){ |
| rc = LSM_OK; |
| }else{ |
| ShmReader *p = &pShm->aReader[i]; |
| p->iLsmId = iLsm; |
| p->iTreeId = iShm; |
| lsmShmLock(db, LSM_LOCK_READER(i), LSM_LOCK_UNLOCK, 0); |
| break; |
| } |
| } |
| |
| return rc; |
| } |
| |
| /* |
| ** Release the read-lock currently held by connection db. |
| */ |
| int dbReleaseReadlock(lsm_db *db){ |
| int rc = LSM_OK; |
| if( db->iReader>=0 ){ |
| rc = lsmShmLock(db, LSM_LOCK_READER(db->iReader), LSM_LOCK_UNLOCK, 0); |
| db->iReader = -1; |
| } |
| db->bRoTrans = 0; |
| return rc; |
| } |
| |
| |
| /* |
| ** Argument bFlush is true if the contents of the in-memory tree has just |
| ** been flushed to disk. The significance of this is that once the snapshot |
| ** created to hold the updated state of the database is synced to disk, log |
| ** file space can be recycled. |
| */ |
| void lsmFinishWork(lsm_db *pDb, int bFlush, int *pRc){ |
| int rc = *pRc; |
| assert( rc!=0 || pDb->pWorker ); |
| if( pDb->pWorker ){ |
| /* If no error has occurred, serialize the worker snapshot and write |
| ** it to shared memory. */ |
| if( rc==LSM_OK ){ |
| rc = lsmSaveWorker(pDb, bFlush); |
| } |
| |
| /* Assuming no error has occurred, update a read lock slot with the |
| ** new snapshot id (see comments above function dbSetReadLock()). */ |
| if( rc==LSM_OK ){ |
| if( pDb->iReader<0 ){ |
| rc = lsmTreeLoadHeader(pDb, 0); |
| } |
| if( rc==LSM_OK ){ |
| rc = dbSetReadLock(pDb, pDb->pWorker->iId, pDb->treehdr.iUsedShmid); |
| } |
| } |
| |
| /* Free the snapshot object. */ |
| lsmFreeSnapshot(pDb->pEnv, pDb->pWorker); |
| pDb->pWorker = 0; |
| } |
| |
| lsmShmLock(pDb, LSM_LOCK_WORKER, LSM_LOCK_UNLOCK, 0); |
| *pRc = rc; |
| } |
| |
| /* |
| ** Called when recovery is finished. |
| */ |
| int lsmFinishRecovery(lsm_db *pDb){ |
| lsmTreeEndTransaction(pDb, 1); |
| return LSM_OK; |
| } |
| |
| /* |
| ** Check if the currently configured compression functions |
| ** (LSM_CONFIG_SET_COMPRESSION) are compatible with a database that has its |
| ** compression id set to iReq. Compression routines are compatible if iReq |
| ** is zero (indicating the database is empty), or if it is equal to the |
| ** compression id of the configured compression routines. |
| ** |
| ** If the check shows that the current compression are incompatible and there |
| ** is a compression factory registered, give it a chance to install new |
| ** compression routines. |
| ** |
| ** If, after any registered factory is invoked, the compression functions |
| ** are still incompatible, return LSM_MISMATCH. Otherwise, LSM_OK. |
| */ |
| int lsmCheckCompressionId(lsm_db *pDb, u32 iReq){ |
| if( iReq!=LSM_COMPRESSION_EMPTY && pDb->compress.iId!=iReq ){ |
| if( pDb->factory.xFactory ){ |
| pDb->bInFactory = 1; |
| pDb->factory.xFactory(pDb->factory.pCtx, pDb, iReq); |
| pDb->bInFactory = 0; |
| } |
| if( pDb->compress.iId!=iReq ){ |
| /* Incompatible */ |
| return LSM_MISMATCH; |
| } |
| } |
| /* Compatible */ |
| return LSM_OK; |
| } |
| |
| /* |
| ** Begin a read transaction. This function is a no-op if the connection |
| ** passed as the only argument already has an open read transaction. |
| */ |
| int lsmBeginReadTrans(lsm_db *pDb){ |
| const int MAX_READLOCK_ATTEMPTS = 10; |
| const int nMaxAttempt = (pDb->bRoTrans ? 1 : MAX_READLOCK_ATTEMPTS); |
| |
| int rc = LSM_OK; /* Return code */ |
| int iAttempt = 0; |
| |
| assert( pDb->pWorker==0 ); |
| |
| while( rc==LSM_OK && pDb->iReader<0 && (iAttempt++)<nMaxAttempt ){ |
| int iTreehdr = 0; |
| int iSnap = 0; |
| assert( pDb->pCsr==0 && pDb->nTransOpen==0 ); |
| |
| /* Load the in-memory tree header. */ |
| rc = lsmTreeLoadHeader(pDb, &iTreehdr); |
| |
| /* Load the database snapshot */ |
| if( rc==LSM_OK ){ |
| if( lsmCheckpointClientCacheOk(pDb)==0 ){ |
| lsmFreeSnapshot(pDb->pEnv, pDb->pClient); |
| pDb->pClient = 0; |
| lsmMCursorFreeCache(pDb); |
| lsmFsPurgeCache(pDb->pFS); |
| rc = lsmCheckpointLoad(pDb, &iSnap); |
| }else{ |
| iSnap = 1; |
| } |
| } |
| |
| /* Take a read-lock on the tree and snapshot just loaded. Then check |
| ** that the shared-memory still contains the same values. If so, proceed. |
| ** Otherwise, relinquish the read-lock and retry the whole procedure |
| ** (starting with loading the in-memory tree header). */ |
| if( rc==LSM_OK ){ |
| u32 iShmMax = pDb->treehdr.iUsedShmid; |
| u32 iShmMin = pDb->treehdr.iNextShmid+1-LSM_MAX_SHMCHUNKS; |
| rc = lsmReadlock( |
| pDb, lsmCheckpointId(pDb->aSnapshot, 0), iShmMin, iShmMax |
| ); |
| if( rc==LSM_OK ){ |
| if( lsmTreeLoadHeaderOk(pDb, iTreehdr) |
| && lsmCheckpointLoadOk(pDb, iSnap) |
| ){ |
| /* Read lock has been successfully obtained. Deserialize the |
| ** checkpoint just loaded. TODO: This will be removed after |
| ** lsm_sorted.c is changed to work directly from the serialized |
| ** version of the snapshot. */ |
| if( pDb->pClient==0 ){ |
| rc = lsmCheckpointDeserialize(pDb, 0, pDb->aSnapshot,&pDb->pClient); |
| } |
| assert( (rc==LSM_OK)==(pDb->pClient!=0) ); |
| assert( pDb->iReader>=0 ); |
| |
| /* Check that the client has the right compression hooks loaded. |
| ** If not, set rc to LSM_MISMATCH. */ |
| if( rc==LSM_OK ){ |
| rc = lsmCheckCompressionId(pDb, pDb->pClient->iCmpId); |
| } |
| }else{ |
| rc = dbReleaseReadlock(pDb); |
| } |
| } |
| |
| if( rc==LSM_BUSY ){ |
| rc = LSM_OK; |
| } |
| } |
| #if 0 |
| if( rc==LSM_OK && pDb->pClient ){ |
| fprintf(stderr, |
| "reading %p: snapshot:%d used-shmid:%d trans-id:%d iOldShmid=%d\n", |
| (void *)pDb, |
| (int)pDb->pClient->iId, (int)pDb->treehdr.iUsedShmid, |
| (int)pDb->treehdr.root.iTransId, |
| (int)pDb->treehdr.iOldShmid |
| ); |
| } |
| #endif |
| } |
| |
| if( rc==LSM_OK ){ |
| rc = lsmShmCacheChunks(pDb, pDb->treehdr.nChunk); |
| } |
| if( rc!=LSM_OK ){ |
| dbReleaseReadlock(pDb); |
| } |
| if( pDb->pClient==0 && rc==LSM_OK ) rc = LSM_BUSY; |
| return rc; |
| } |
| |
| /* |
| ** This function is used by a read-write connection to determine if there |
| ** are currently one or more read-only transactions open on the database |
| ** (in this context a read-only transaction is one opened by a read-only |
| ** connection on a non-live database). |
| ** |
| ** If no error occurs, LSM_OK is returned and *pbExists is set to true if |
| ** some other connection has a read-only transaction open, or false |
| ** otherwise. If an error occurs an LSM error code is returned and the final |
| ** value of *pbExist is undefined. |
| */ |
| int lsmDetectRoTrans(lsm_db *db, int *pbExist){ |
| int rc; |
| |
| /* Only a read-write connection may use this function. */ |
| assert( db->bReadonly==0 ); |
| |
| rc = lsmShmTestLock(db, LSM_LOCK_ROTRANS, 1, LSM_LOCK_EXCL); |
| if( rc==LSM_BUSY ){ |
| *pbExist = 1; |
| rc = LSM_OK; |
| }else{ |
| *pbExist = 0; |
| } |
| |
| return rc; |
| } |
| |
| /* |
| ** db is a read-only database handle in the disconnected state. This function |
| ** attempts to open a read-transaction on the database. This may involve |
| ** connecting to the database system (opening shared memory etc.). |
| */ |
| int lsmBeginRoTrans(lsm_db *db){ |
| int rc = LSM_OK; |
| |
| assert( db->bReadonly && db->pShmhdr==0 ); |
| assert( db->iReader<0 ); |
| |
| if( db->bRoTrans==0 ){ |
| |
| /* Attempt a shared-lock on DMS1. */ |
| rc = lsmShmLock(db, LSM_LOCK_DMS1, LSM_LOCK_SHARED, 0); |
| if( rc!=LSM_OK ) return rc; |
| |
| rc = lsmShmTestLock( |
| db, LSM_LOCK_RWCLIENT(0), LSM_LOCK_NREADER, LSM_LOCK_SHARED |
| ); |
| if( rc==LSM_OK ){ |
| /* System is not live. Take a SHARED lock on the ROTRANS byte and |
| ** release DMS1. Locking ROTRANS tells all read-write clients that they |
| ** may not recycle any disk space from within the database or log files, |
| ** as a read-only client may be using it. */ |
| rc = lsmShmLock(db, LSM_LOCK_ROTRANS, LSM_LOCK_SHARED, 0); |
| lsmShmLock(db, LSM_LOCK_DMS1, LSM_LOCK_UNLOCK, 0); |
| |
| if( rc==LSM_OK ){ |
| db->bRoTrans = 1; |
| rc = lsmShmCacheChunks(db, 1); |
| if( rc==LSM_OK ){ |
| db->pShmhdr = (ShmHeader *)db->apShm[0]; |
| memset(db->pShmhdr, 0, sizeof(ShmHeader)); |
| rc = lsmCheckpointRecover(db); |
| if( rc==LSM_OK ){ |
| rc = lsmLogRecover(db); |
| } |
| } |
| } |
| }else if( rc==LSM_BUSY ){ |
| /* System is live! */ |
| rc = lsmShmLock(db, LSM_LOCK_DMS3, LSM_LOCK_SHARED, 0); |
| lsmShmLock(db, LSM_LOCK_DMS1, LSM_LOCK_UNLOCK, 0); |
| if( rc==LSM_OK ){ |
| rc = lsmShmCacheChunks(db, 1); |
| if( rc==LSM_OK ){ |
| db->pShmhdr = (ShmHeader *)db->apShm[0]; |
| } |
| } |
| } |
| |
| if( rc==LSM_OK ){ |
| rc = lsmBeginReadTrans(db); |
| } |
| } |
| |
| return rc; |
| } |
| |
| /* |
| ** Close the currently open read transaction. |
| */ |
| void lsmFinishReadTrans(lsm_db *pDb){ |
| |
| /* Worker connections should not be closing read transactions. And |
| ** read transactions should only be closed after all cursors and write |
| ** transactions have been closed. Finally pClient should be non-NULL |
| ** only iff pDb->iReader>=0. */ |
| assert( pDb->pWorker==0 ); |
| assert( pDb->pCsr==0 && pDb->nTransOpen==0 ); |
| |
| if( pDb->bRoTrans ){ |
| int i; |
| for(i=0; i<pDb->nShm; i++){ |
| lsmFree(pDb->pEnv, pDb->apShm[i]); |
| } |
| lsmFree(pDb->pEnv, pDb->apShm); |
| pDb->apShm = 0; |
| pDb->nShm = 0; |
| pDb->pShmhdr = 0; |
| |
| lsmShmLock(pDb, LSM_LOCK_ROTRANS, LSM_LOCK_UNLOCK, 0); |
| } |
| dbReleaseReadlock(pDb); |
| } |
| |
| /* |
| ** Open a write transaction. |
| */ |
| int lsmBeginWriteTrans(lsm_db *pDb){ |
| int rc = LSM_OK; /* Return code */ |
| ShmHeader *pShm = pDb->pShmhdr; /* Shared memory header */ |
| |
| assert( pDb->nTransOpen==0 ); |
| assert( pDb->bDiscardOld==0 ); |
| assert( pDb->bReadonly==0 ); |
| |
| /* If there is no read-transaction open, open one now. */ |
| if( pDb->iReader<0 ){ |
| rc = lsmBeginReadTrans(pDb); |
| } |
| |
| /* Attempt to take the WRITER lock */ |
| if( rc==LSM_OK ){ |
| rc = lsmShmLock(pDb, LSM_LOCK_WRITER, LSM_LOCK_EXCL, 0); |
| } |
| |
| /* If the previous writer failed mid-transaction, run emergency rollback. */ |
| if( rc==LSM_OK && pShm->bWriter ){ |
| rc = lsmTreeRepair(pDb); |
| if( rc==LSM_OK ) pShm->bWriter = 0; |
| } |
| |
| /* Check that this connection is currently reading from the most recent |
| ** version of the database. If not, return LSM_BUSY. */ |
| if( rc==LSM_OK && memcmp(&pShm->hdr1, &pDb->treehdr, sizeof(TreeHeader)) ){ |
| rc = LSM_BUSY; |
| } |
| |
| if( rc==LSM_OK ){ |
| rc = lsmLogBegin(pDb); |
| } |
| |
| /* If everything was successful, set the "transaction-in-progress" flag |
| ** and return LSM_OK. Otherwise, if some error occurred, relinquish the |
| ** WRITER lock and return an error code. */ |
| if( rc==LSM_OK ){ |
| TreeHeader *p = &pDb->treehdr; |
| pShm->bWriter = 1; |
| p->root.iTransId++; |
| if( lsmTreeHasOld(pDb) && p->iOldLog==pDb->pClient->iLogOff ){ |
| lsmTreeDiscardOld(pDb); |
| pDb->bDiscardOld = 1; |
| } |
| }else{ |
| lsmShmLock(pDb, LSM_LOCK_WRITER, LSM_LOCK_UNLOCK, 0); |
| if( pDb->pCsr==0 ) lsmFinishReadTrans(pDb); |
| } |
| return rc; |
| } |
| |
| /* |
| ** End the current write transaction. The connection is left with an open |
| ** read transaction. It is an error to call this if there is no open write |
| ** transaction. |
| ** |
| ** If the transaction was committed, then a commit record has already been |
| ** written into the log file when this function is called. Or, if the |
| ** transaction was rolled back, both the log file and in-memory tree |
| ** structure have already been restored. In either case, this function |
| ** merely releases locks and other resources held by the write-transaction. |
| ** |
| ** LSM_OK is returned if successful, or an LSM error code otherwise. |
| */ |
| int lsmFinishWriteTrans(lsm_db *pDb, int bCommit){ |
| int rc = LSM_OK; |
| int bFlush = 0; |
| |
| lsmLogEnd(pDb, bCommit); |
| if( rc==LSM_OK && bCommit && lsmTreeSize(pDb)>pDb->nTreeLimit ){ |
| bFlush = 1; |
| lsmTreeMakeOld(pDb); |
| } |
| lsmTreeEndTransaction(pDb, bCommit); |
| |
| if( rc==LSM_OK ){ |
| if( bFlush && pDb->bAutowork ){ |
| rc = lsmSortedAutoWork(pDb, 1); |
| }else if( bCommit && pDb->bDiscardOld ){ |
| rc = dbSetReadLock(pDb, pDb->pClient->iId, pDb->treehdr.iUsedShmid); |
| } |
| } |
| pDb->bDiscardOld = 0; |
| lsmShmLock(pDb, LSM_LOCK_WRITER, LSM_LOCK_UNLOCK, 0); |
| |
| if( bFlush && pDb->bAutowork==0 && pDb->xWork ){ |
| pDb->xWork(pDb, pDb->pWorkCtx); |
| } |
| return rc; |
| } |
| |
| |
| /* |
| ** Return non-zero if the caller is holding the client mutex. |
| */ |
| #ifdef LSM_DEBUG |
| int lsmHoldingClientMutex(lsm_db *pDb){ |
| return lsmMutexHeld(pDb->pEnv, pDb->pDatabase->pClientMutex); |
| } |
| #endif |
| |
| static int slotIsUsable(ShmReader *p, i64 iLsm, u32 iShmMin, u32 iShmMax){ |
| return( |
| p->iLsmId && p->iLsmId<=iLsm |
| && shm_sequence_ge(iShmMax, p->iTreeId) |
| && shm_sequence_ge(p->iTreeId, iShmMin) |
| ); |
| } |
| |
| /* |
| ** Obtain a read-lock on database version identified by the combination |
| ** of snapshot iLsm and tree iTree. Return LSM_OK if successful, or |
| ** an LSM error code otherwise. |
| */ |
| int lsmReadlock(lsm_db *db, i64 iLsm, u32 iShmMin, u32 iShmMax){ |
| int rc = LSM_OK; |
| ShmHeader *pShm = db->pShmhdr; |
| int i; |
| |
| assert( db->iReader<0 ); |
| assert( shm_sequence_ge(iShmMax, iShmMin) ); |
| |
| /* This is a no-op if the read-only transaction flag is set. */ |
| if( db->bRoTrans ){ |
| db->iReader = 0; |
| return LSM_OK; |
| } |
| |
| /* Search for an exact match. */ |
| for(i=0; db->iReader<0 && rc==LSM_OK && i<LSM_LOCK_NREADER; i++){ |
| ShmReader *p = &pShm->aReader[i]; |
| if( p->iLsmId==iLsm && p->iTreeId==iShmMax ){ |
| rc = lsmShmLock(db, LSM_LOCK_READER(i), LSM_LOCK_SHARED, 0); |
| if( rc==LSM_OK && p->iLsmId==iLsm && p->iTreeId==iShmMax ){ |
| db->iReader = i; |
| }else if( rc==LSM_BUSY ){ |
| rc = LSM_OK; |
| } |
| } |
| } |
| |
| /* Try to obtain a write-lock on each slot, in order. If successful, set |
| ** the slot values to iLsm/iTree. */ |
| for(i=0; db->iReader<0 && rc==LSM_OK && i<LSM_LOCK_NREADER; i++){ |
| rc = lsmShmLock(db, LSM_LOCK_READER(i), LSM_LOCK_EXCL, 0); |
| if( rc==LSM_BUSY ){ |
| rc = LSM_OK; |
| }else{ |
| ShmReader *p = &pShm->aReader[i]; |
| p->iLsmId = iLsm; |
| p->iTreeId = iShmMax; |
| rc = lsmShmLock(db, LSM_LOCK_READER(i), LSM_LOCK_SHARED, 0); |
| assert( rc!=LSM_BUSY ); |
| if( rc==LSM_OK ) db->iReader = i; |
| } |
| } |
| |
| /* Search for any usable slot */ |
| for(i=0; db->iReader<0 && rc==LSM_OK && i<LSM_LOCK_NREADER; i++){ |
| ShmReader *p = &pShm->aReader[i]; |
| if( slotIsUsable(p, iLsm, iShmMin, iShmMax) ){ |
| rc = lsmShmLock(db, LSM_LOCK_READER(i), LSM_LOCK_SHARED, 0); |
| if( rc==LSM_OK && slotIsUsable(p, iLsm, iShmMin, iShmMax) ){ |
| db->iReader = i; |
| }else if( rc==LSM_BUSY ){ |
| rc = LSM_OK; |
| } |
| } |
| } |
| |
| if( rc==LSM_OK && db->iReader<0 ){ |
| rc = LSM_BUSY; |
| } |
| return rc; |
| } |
| |
| /* |
| ** This is used to check if there exists a read-lock locking a particular |
| ** version of either the in-memory tree or database file. |
| ** |
| ** If iLsmId is non-zero, then it is a snapshot id. If there exists a |
| ** read-lock using this snapshot or newer, set *pbInUse to true. Or, |
| ** if there is no such read-lock, set it to false. |
| ** |
| ** Or, if iLsmId is zero, then iShmid is a shared-memory sequence id. |
| ** Search for a read-lock using this sequence id or newer. etc. |
| */ |
| static int isInUse(lsm_db *db, i64 iLsmId, u32 iShmid, int *pbInUse){ |
| ShmHeader *pShm = db->pShmhdr; |
| int i; |
| int rc = LSM_OK; |
| |
| for(i=0; rc==LSM_OK && i<LSM_LOCK_NREADER; i++){ |
| ShmReader *p = &pShm->aReader[i]; |
| if( p->iLsmId ){ |
| if( (iLsmId!=0 && p->iLsmId!=0 && iLsmId>=p->iLsmId) |
| || (iLsmId==0 && shm_sequence_ge(p->iTreeId, iShmid)) |
| ){ |
| rc = lsmShmLock(db, LSM_LOCK_READER(i), LSM_LOCK_EXCL, 0); |
| if( rc==LSM_OK ){ |
| p->iLsmId = 0; |
| lsmShmLock(db, LSM_LOCK_READER(i), LSM_LOCK_UNLOCK, 0); |
| } |
| } |
| } |
| } |
| |
| if( rc==LSM_BUSY ){ |
| *pbInUse = 1; |
| return LSM_OK; |
| } |
| *pbInUse = 0; |
| return rc; |
| } |
| |
| /* |
| ** This function is called by worker connections to determine the smallest |
| ** snapshot id that is currently in use by a database client. The worker |
| ** connection uses this result to determine whether or not it is safe to |
| ** recycle a database block. |
| */ |
| static int firstSnapshotInUse( |
| lsm_db *db, /* Database handle */ |
| i64 *piInUse /* IN/OUT: Smallest snapshot id in use */ |
| ){ |
| ShmHeader *pShm = db->pShmhdr; |
| i64 iInUse = *piInUse; |
| int i; |
| |
| assert( iInUse>0 ); |
| for(i=0; i<LSM_LOCK_NREADER; i++){ |
| ShmReader *p = &pShm->aReader[i]; |
| if( p->iLsmId ){ |
| i64 iThis = p->iLsmId; |
| if( iThis!=0 && iInUse>iThis ){ |
| int rc = lsmShmLock(db, LSM_LOCK_READER(i), LSM_LOCK_EXCL, 0); |
| if( rc==LSM_OK ){ |
| p->iLsmId = 0; |
| lsmShmLock(db, LSM_LOCK_READER(i), LSM_LOCK_UNLOCK, 0); |
| }else if( rc==LSM_BUSY ){ |
| iInUse = iThis; |
| }else{ |
| /* Some error other than LSM_BUSY. Return the error code to |
| ** the caller in this case. */ |
| return rc; |
| } |
| } |
| } |
| } |
| |
| *piInUse = iInUse; |
| return LSM_OK; |
| } |
| |
| int lsmTreeInUse(lsm_db *db, u32 iShmid, int *pbInUse){ |
| if( db->treehdr.iUsedShmid==iShmid ){ |
| *pbInUse = 1; |
| return LSM_OK; |
| } |
| return isInUse(db, 0, iShmid, pbInUse); |
| } |
| |
| int lsmLsmInUse(lsm_db *db, i64 iLsmId, int *pbInUse){ |
| if( db->pClient && db->pClient->iId<=iLsmId ){ |
| *pbInUse = 1; |
| return LSM_OK; |
| } |
| return isInUse(db, iLsmId, 0, pbInUse); |
| } |
| |
| /* |
| ** This function may only be called after a successful call to |
| ** lsmDbDatabaseConnect(). It returns true if the connection is in |
| ** multi-process mode, or false otherwise. |
| */ |
| int lsmDbMultiProc(lsm_db *pDb){ |
| return pDb->pDatabase && pDb->pDatabase->bMultiProc; |
| } |
| |
| |
| /************************************************************************* |
| ************************************************************************** |
| ************************************************************************** |
| ************************************************************************** |
| ************************************************************************** |
| *************************************************************************/ |
| |
| /* |
| ** Ensure that database connection db has cached pointers to at least the |
| ** first nChunk chunks of shared memory. |
| */ |
| int lsmShmCacheChunks(lsm_db *db, int nChunk){ |
| int rc = LSM_OK; |
| if( nChunk>db->nShm ){ |
| static const int NINCR = 16; |
| Database *p = db->pDatabase; |
| lsm_env *pEnv = db->pEnv; |
| int nAlloc; |
| int i; |
| |
| /* Ensure that the db->apShm[] array is large enough. If an attempt to |
| ** allocate memory fails, return LSM_NOMEM immediately. The apShm[] array |
| ** is always extended in multiples of 16 entries - so the actual allocated |
| ** size can be inferred from nShm. */ |
| nAlloc = ((db->nShm + NINCR - 1) / NINCR) * NINCR; |
| while( nChunk>=nAlloc ){ |
| void **apShm; |
| nAlloc += NINCR; |
| apShm = lsmRealloc(pEnv, db->apShm, sizeof(void*)*nAlloc); |
| if( !apShm ) return LSM_NOMEM_BKPT; |
| db->apShm = apShm; |
| } |
| |
| if( db->bRoTrans ){ |
| for(i=db->nShm; rc==LSM_OK && i<nChunk; i++){ |
| db->apShm[i] = lsmMallocZeroRc(pEnv, LSM_SHM_CHUNK_SIZE, &rc); |
| db->nShm++; |
| } |
| |
| }else{ |
| |
| /* Enter the client mutex */ |
| lsmMutexEnter(pEnv, p->pClientMutex); |
| |
| /* Extend the Database objects apShmChunk[] array if necessary. Using the |
| ** same pattern as for the lsm_db.apShm[] array above. */ |
| nAlloc = ((p->nShmChunk + NINCR - 1) / NINCR) * NINCR; |
| while( nChunk>=nAlloc ){ |
| void **apShm; |
| nAlloc += NINCR; |
| apShm = lsmRealloc(pEnv, p->apShmChunk, sizeof(void*)*nAlloc); |
| if( !apShm ){ |
| rc = LSM_NOMEM_BKPT; |
| break; |
| } |
| p->apShmChunk = apShm; |
| } |
| |
| for(i=db->nShm; rc==LSM_OK && i<nChunk; i++){ |
| if( i>=p->nShmChunk ){ |
| void *pChunk = 0; |
| if( p->bMultiProc==0 ){ |
| /* Single process mode */ |
| pChunk = lsmMallocZeroRc(pEnv, LSM_SHM_CHUNK_SIZE, &rc); |
| }else{ |
| /* Multi-process mode */ |
| rc = lsmEnvShmMap(pEnv, p->pFile, i, LSM_SHM_CHUNK_SIZE, &pChunk); |
| } |
| if( rc==LSM_OK ){ |
| p->apShmChunk[i] = pChunk; |
| p->nShmChunk++; |
| } |
| } |
| if( rc==LSM_OK ){ |
| db->apShm[i] = p->apShmChunk[i]; |
| db->nShm++; |
| } |
| } |
| |
| /* Release the client mutex */ |
| lsmMutexLeave(pEnv, p->pClientMutex); |
| } |
| } |
| |
| return rc; |
| } |
| |
| static int lockSharedFile(lsm_env *pEnv, Database *p, int iLock, int eOp){ |
| int rc = LSM_OK; |
| if( p->bMultiProc ){ |
| rc = lsmEnvLock(pEnv, p->pFile, iLock, eOp); |
| } |
| return rc; |
| } |
| |
| /* |
| ** Test if it would be possible for connection db to obtain a lock of type |
| ** eType on the nLock locks starting at iLock. If so, return LSM_OK. If it |
| ** would not be possible to obtain the lock due to a lock held by another |
| ** connection, return LSM_BUSY. If an IO or other error occurs (i.e. in the |
| ** lsm_env.xTestLock function), return some other LSM error code. |
| ** |
| ** Note that this function never actually locks the database - it merely |
| ** queries the system to see if there exists a lock that would prevent |
| ** it from doing so. |
| */ |
| int lsmShmTestLock( |
| lsm_db *db, |
| int iLock, |
| int nLock, |
| int eOp |
| ){ |
| int rc = LSM_OK; |
| lsm_db *pIter; |
| Database *p = db->pDatabase; |
| int i; |
| u64 mask = 0; |
| |
| for(i=iLock; i<(iLock+nLock); i++){ |
| mask |= ((u64)1 << (iLock-1)); |
| if( eOp==LSM_LOCK_EXCL ) mask |= ((u64)1 << (iLock+32-1)); |
| } |
| |
| lsmMutexEnter(db->pEnv, p->pClientMutex); |
| for(pIter=p->pConn; pIter; pIter=pIter->pNext){ |
| if( pIter!=db && (pIter->mLock & mask) ){ |
| assert( pIter!=db ); |
| break; |
| } |
| } |
| |
| if( pIter ){ |
| rc = LSM_BUSY; |
| }else if( p->bMultiProc ){ |
| rc = lsmEnvTestLock(db->pEnv, p->pFile, iLock, nLock, eOp); |
| } |
| |
| lsmMutexLeave(db->pEnv, p->pClientMutex); |
| return rc; |
| } |
| |
| /* |
| ** Attempt to obtain the lock identified by the iLock and bExcl parameters. |
| ** If successful, return LSM_OK. If the lock cannot be obtained because |
| ** there exists some other conflicting lock, return LSM_BUSY. If some other |
| ** error occurs, return an LSM error code. |
| ** |
| ** Parameter iLock must be one of LSM_LOCK_WRITER, WORKER or CHECKPOINTER, |
| ** or else a value returned by the LSM_LOCK_READER macro. |
| */ |
| int lsmShmLock( |
| lsm_db *db, |
| int iLock, |
| int eOp, /* One of LSM_LOCK_UNLOCK, SHARED or EXCL */ |
| int bBlock /* True for a blocking lock */ |
| ){ |
| lsm_db *pIter; |
| const u64 me = ((u64)1 << (iLock-1)); |
| const u64 ms = ((u64)1 << (iLock+32-1)); |
| int rc = LSM_OK; |
| Database *p = db->pDatabase; |
| |
| assert( eOp!=LSM_LOCK_EXCL || p->bReadonly==0 ); |
| assert( iLock>=1 && iLock<=LSM_LOCK_RWCLIENT(LSM_LOCK_NRWCLIENT-1) ); |
| assert( LSM_LOCK_RWCLIENT(LSM_LOCK_NRWCLIENT-1)<=32 ); |
| assert( eOp==LSM_LOCK_UNLOCK || eOp==LSM_LOCK_SHARED || eOp==LSM_LOCK_EXCL ); |
| |
| /* Check for a no-op. Proceed only if this is not one of those. */ |
| if( (eOp==LSM_LOCK_UNLOCK && (db->mLock & (me|ms))!=0) |
| || (eOp==LSM_LOCK_SHARED && (db->mLock & (me|ms))!=ms) |
| || (eOp==LSM_LOCK_EXCL && (db->mLock & me)==0) |
| ){ |
| int nExcl = 0; /* Number of connections holding EXCLUSIVE */ |
| int nShared = 0; /* Number of connections holding SHARED */ |
| lsmMutexEnter(db->pEnv, p->pClientMutex); |
| |
| /* Figure out the locks currently held by this process on iLock, not |
| ** including any held by connection db. */ |
| for(pIter=p->pConn; pIter; pIter=pIter->pNext){ |
| assert( (pIter->mLock & me)==0 || (pIter->mLock & ms)!=0 ); |
| if( pIter!=db ){ |
| if( pIter->mLock & me ){ |
| nExcl++; |
| }else if( pIter->mLock & ms ){ |
| nShared++; |
| } |
| } |
| } |
| assert( nExcl==0 || nExcl==1 ); |
| assert( nExcl==0 || nShared==0 ); |
| assert( nExcl==0 || (db->mLock & (me|ms))==0 ); |
| |
| switch( eOp ){ |
| case LSM_LOCK_UNLOCK: |
| if( nShared==0 ){ |
| lockSharedFile(db->pEnv, p, iLock, LSM_LOCK_UNLOCK); |
| } |
| db->mLock &= ~(me|ms); |
| break; |
| |
| case LSM_LOCK_SHARED: |
| if( nExcl ){ |
| rc = LSM_BUSY; |
| }else{ |
| if( nShared==0 ){ |
| rc = lockSharedFile(db->pEnv, p, iLock, LSM_LOCK_SHARED); |
| } |
| if( rc==LSM_OK ){ |
| db->mLock |= ms; |
| db->mLock &= ~me; |
| } |
| } |
| break; |
| |
| default: |
| assert( eOp==LSM_LOCK_EXCL ); |
| if( nExcl || nShared ){ |
| rc = LSM_BUSY; |
| }else{ |
| rc = lockSharedFile(db->pEnv, p, iLock, LSM_LOCK_EXCL); |
| if( rc==LSM_OK ){ |
| db->mLock |= (me|ms); |
| } |
| } |
| break; |
| } |
| |
| lsmMutexLeave(db->pEnv, p->pClientMutex); |
| } |
| |
| return rc; |
| } |
| |
| #ifdef LSM_DEBUG |
| |
| int shmLockType(lsm_db *db, int iLock){ |
| const u64 me = ((u64)1 << (iLock-1)); |
| const u64 ms = ((u64)1 << (iLock+32-1)); |
| |
| if( db->mLock & me ) return LSM_LOCK_EXCL; |
| if( db->mLock & ms ) return LSM_LOCK_SHARED; |
| return LSM_LOCK_UNLOCK; |
| } |
| |
| /* |
| ** The arguments passed to this function are similar to those passed to |
| ** the lsmShmLock() function. However, instead of obtaining a new lock |
| ** this function returns true if the specified connection already holds |
| ** (or does not hold) such a lock, depending on the value of eOp. As |
| ** follows: |
| ** |
| ** (eOp==LSM_LOCK_UNLOCK) -> true if db has no lock on iLock |
| ** (eOp==LSM_LOCK_SHARED) -> true if db has at least a SHARED lock on iLock. |
| ** (eOp==LSM_LOCK_EXCL) -> true if db has an EXCLUSIVE lock on iLock. |
| */ |
| int lsmShmAssertLock(lsm_db *db, int iLock, int eOp){ |
| int ret = 0; |
| int eHave; |
| |
| assert( iLock>=1 && iLock<=LSM_LOCK_READER(LSM_LOCK_NREADER-1) ); |
| assert( iLock<=16 ); |
| assert( eOp==LSM_LOCK_UNLOCK || eOp==LSM_LOCK_SHARED || eOp==LSM_LOCK_EXCL ); |
| |
| eHave = shmLockType(db, iLock); |
| |
| switch( eOp ){ |
| case LSM_LOCK_UNLOCK: |
| ret = (eHave==LSM_LOCK_UNLOCK); |
| break; |
| case LSM_LOCK_SHARED: |
| ret = (eHave!=LSM_LOCK_UNLOCK); |
| break; |
| case LSM_LOCK_EXCL: |
| ret = (eHave==LSM_LOCK_EXCL); |
| break; |
| default: |
| assert( !"bad eOp value passed to lsmShmAssertLock()" ); |
| break; |
| } |
| |
| return ret; |
| } |
| |
| int lsmShmAssertWorker(lsm_db *db){ |
| return lsmShmAssertLock(db, LSM_LOCK_WORKER, LSM_LOCK_EXCL) && db->pWorker; |
| } |
| |
| /* |
| ** This function does not contribute to library functionality, and is not |
| ** included in release builds. It is intended to be called from within |
| ** an interactive debugger. |
| ** |
| ** When called, this function prints a single line of human readable output |
| ** to stdout describing the locks currently held by the connection. For |
| ** example: |
| ** |
| ** (gdb) call print_db_locks(pDb) |
| ** (shared on dms2) (exclusive on writer) |
| */ |
| void print_db_locks(lsm_db *db){ |
| int iLock; |
| for(iLock=0; iLock<16; iLock++){ |
| int bOne = 0; |
| const char *azLock[] = {0, "shared", "exclusive"}; |
| const char *azName[] = { |
| 0, "dms1", "dms2", "writer", "worker", "checkpointer", |
| "reader0", "reader1", "reader2", "reader3", "reader4", "reader5" |
| }; |
| int eHave = shmLockType(db, iLock); |
| if( azLock[eHave] ){ |
| printf("%s(%s on %s)", (bOne?" ":""), azLock[eHave], azName[iLock]); |
| bOne = 1; |
| } |
| } |
| printf("\n"); |
| } |
| void print_all_db_locks(lsm_db *db){ |
| lsm_db *p; |
| for(p=db->pDatabase->pConn; p; p=p->pNext){ |
| printf("%s connection %p ", ((p==db)?"*":""), p); |
| print_db_locks(p); |
| } |
| } |
| #endif |
| |
| void lsmShmBarrier(lsm_db *db){ |
| lsmEnvShmBarrier(db->pEnv); |
| } |
| |
| int lsm_checkpoint(lsm_db *pDb, int *pnKB){ |
| int rc; /* Return code */ |
| u32 nWrite = 0; /* Number of pages checkpointed */ |
| |
| /* Attempt the checkpoint. If successful, nWrite is set to the number of |
| ** pages written between this and the previous checkpoint. */ |
| rc = lsmCheckpointWrite(pDb, &nWrite); |
| |
| /* If required, calculate the output variable (KB of data checkpointed). |
| ** Set it to zero if an error occured. */ |
| if( pnKB ){ |
| int nKB = 0; |
| if( rc==LSM_OK && nWrite ){ |
| nKB = (((i64)nWrite * lsmFsPageSize(pDb->pFS)) + 1023) / 1024; |
| } |
| *pnKB = nKB; |
| } |
| |
| return rc; |
| } |