blob: 7ff91e667b6baa90d48c7709a299b9c4801a92c0 [file] [log] [blame]
/*
** 2004 April 6
**
** The author disclaims copyright to this source code. In place of
** a legal notice, here is a blessing:
**
** May you do good and not evil.
** May you find forgiveness for yourself and forgive others.
** May you share freely, never taking more than you give.
**
*************************************************************************
** This file implements an external (disk-based) database using BTrees.
** See the header comment on "btreeInt.h" for additional information.
** Including a description of file format and an overview of operation.
*/
#include "btreeInt.h"
/*
** The header string that appears at the beginning of every
** SQLite database.
*/
static const char zMagicHeader[] = SQLITE_FILE_HEADER;
/*
** Set this global variable to 1 to enable tracing using the TRACE
** macro.
*/
#if 0
int sqlite3BtreeTrace=1; /* True to enable tracing */
# define TRACE(X) if(sqlite3BtreeTrace){printf X;fflush(stdout);}
#else
# define TRACE(X)
#endif
/*
** Extract a 2-byte big-endian integer from an array of unsigned bytes.
** But if the value is zero, make it 65536.
**
** This routine is used to extract the "offset to cell content area" value
** from the header of a btree page. If the page size is 65536 and the page
** is empty, the offset should be 65536, but the 2-byte value stores zero.
** This routine makes the necessary adjustment to 65536.
*/
#define get2byteNotZero(X) (((((int)get2byte(X))-1)&0xffff)+1)
/*
** Values passed as the 5th argument to allocateBtreePage()
*/
#define BTALLOC_ANY 0 /* Allocate any page */
#define BTALLOC_EXACT 1 /* Allocate exact page if possible */
#define BTALLOC_LE 2 /* Allocate any page <= the parameter */
/*
** Macro IfNotOmitAV(x) returns (x) if SQLITE_OMIT_AUTOVACUUM is not
** defined, or 0 if it is. For example:
**
** bIncrVacuum = IfNotOmitAV(pBtShared->incrVacuum);
*/
#ifndef SQLITE_OMIT_AUTOVACUUM
#define IfNotOmitAV(expr) (expr)
#else
#define IfNotOmitAV(expr) 0
#endif
#ifndef SQLITE_OMIT_SHARED_CACHE
/*
** A list of BtShared objects that are eligible for participation
** in shared cache. This variable has file scope during normal builds,
** but the test harness needs to access it so we make it global for
** test builds.
**
** Access to this variable is protected by SQLITE_MUTEX_STATIC_MASTER.
*/
#ifdef SQLITE_TEST
BtShared *SQLITE_WSD sqlite3SharedCacheList = 0;
#else
static BtShared *SQLITE_WSD sqlite3SharedCacheList = 0;
#endif
#endif /* SQLITE_OMIT_SHARED_CACHE */
#ifndef SQLITE_OMIT_SHARED_CACHE
/*
** Enable or disable the shared pager and schema features.
**
** This routine has no effect on existing database connections.
** The shared cache setting effects only future calls to
** sqlite3_open(), sqlite3_open16(), or sqlite3_open_v2().
*/
int sqlite3_enable_shared_cache(int enable){
sqlite3GlobalConfig.sharedCacheEnabled = enable;
return SQLITE_OK;
}
#endif
#ifdef SQLITE_OMIT_SHARED_CACHE
/*
** The functions querySharedCacheTableLock(), setSharedCacheTableLock(),
** and clearAllSharedCacheTableLocks()
** manipulate entries in the BtShared.pLock linked list used to store
** shared-cache table level locks. If the library is compiled with the
** shared-cache feature disabled, then there is only ever one user
** of each BtShared structure and so this locking is not necessary.
** So define the lock related functions as no-ops.
*/
#define querySharedCacheTableLock(a,b,c) SQLITE_OK
#define setSharedCacheTableLock(a,b,c) SQLITE_OK
#define clearAllSharedCacheTableLocks(a)
#define downgradeAllSharedCacheTableLocks(a)
#define hasSharedCacheTableLock(a,b,c,d) 1
#define hasReadConflicts(a, b) 0
#endif
/*
** Implementation of the SQLITE_CORRUPT_PAGE() macro. Takes a single
** (MemPage*) as an argument. The (MemPage*) must not be NULL.
**
** If SQLITE_DEBUG is not defined, then this macro is equivalent to
** SQLITE_CORRUPT_BKPT. Or, if SQLITE_DEBUG is set, then the log message
** normally produced as a side-effect of SQLITE_CORRUPT_BKPT is augmented
** with the page number and filename associated with the (MemPage*).
*/
#ifdef SQLITE_DEBUG
int corruptPageError(int lineno, MemPage *p){
char *zMsg;
sqlite3BeginBenignMalloc();
zMsg = sqlite3_mprintf("database corruption page %d of %s",
(int)p->pgno, sqlite3PagerFilename(p->pBt->pPager, 0)
);
sqlite3EndBenignMalloc();
if( zMsg ){
sqlite3ReportError(SQLITE_CORRUPT, lineno, zMsg);
}
sqlite3_free(zMsg);
return SQLITE_CORRUPT_BKPT;
}
# define SQLITE_CORRUPT_PAGE(pMemPage) corruptPageError(__LINE__, pMemPage)
#else
# define SQLITE_CORRUPT_PAGE(pMemPage) SQLITE_CORRUPT_PGNO(pMemPage->pgno)
#endif
#ifndef SQLITE_OMIT_SHARED_CACHE
#ifdef SQLITE_DEBUG
/*
**** This function is only used as part of an assert() statement. ***
**
** Check to see if pBtree holds the required locks to read or write to the
** table with root page iRoot. Return 1 if it does and 0 if not.
**
** For example, when writing to a table with root-page iRoot via
** Btree connection pBtree:
**
** assert( hasSharedCacheTableLock(pBtree, iRoot, 0, WRITE_LOCK) );
**
** When writing to an index that resides in a sharable database, the
** caller should have first obtained a lock specifying the root page of
** the corresponding table. This makes things a bit more complicated,
** as this module treats each table as a separate structure. To determine
** the table corresponding to the index being written, this
** function has to search through the database schema.
**
** Instead of a lock on the table/index rooted at page iRoot, the caller may
** hold a write-lock on the schema table (root page 1). This is also
** acceptable.
*/
static int hasSharedCacheTableLock(
Btree *pBtree, /* Handle that must hold lock */
Pgno iRoot, /* Root page of b-tree */
int isIndex, /* True if iRoot is the root of an index b-tree */
int eLockType /* Required lock type (READ_LOCK or WRITE_LOCK) */
){
Schema *pSchema = (Schema *)pBtree->pBt->pSchema;
Pgno iTab = 0;
BtLock *pLock;
/* If this database is not shareable, or if the client is reading
** and has the read-uncommitted flag set, then no lock is required.
** Return true immediately.
*/
if( (pBtree->sharable==0)
|| (eLockType==READ_LOCK && (pBtree->db->flags & SQLITE_ReadUncommit))
){
return 1;
}
/* If the client is reading or writing an index and the schema is
** not loaded, then it is too difficult to actually check to see if
** the correct locks are held. So do not bother - just return true.
** This case does not come up very often anyhow.
*/
if( isIndex && (!pSchema || (pSchema->schemaFlags&DB_SchemaLoaded)==0) ){
return 1;
}
/* Figure out the root-page that the lock should be held on. For table
** b-trees, this is just the root page of the b-tree being read or
** written. For index b-trees, it is the root page of the associated
** table. */
if( isIndex ){
HashElem *p;
for(p=sqliteHashFirst(&pSchema->idxHash); p; p=sqliteHashNext(p)){
Index *pIdx = (Index *)sqliteHashData(p);
if( pIdx->tnum==(int)iRoot ){
if( iTab ){
/* Two or more indexes share the same root page. There must
** be imposter tables. So just return true. The assert is not
** useful in that case. */
return 1;
}
iTab = pIdx->pTable->tnum;
}
}
}else{
iTab = iRoot;
}
/* Search for the required lock. Either a write-lock on root-page iTab, a
** write-lock on the schema table, or (if the client is reading) a
** read-lock on iTab will suffice. Return 1 if any of these are found. */
for(pLock=pBtree->pBt->pLock; pLock; pLock=pLock->pNext){
if( pLock->pBtree==pBtree
&& (pLock->iTable==iTab || (pLock->eLock==WRITE_LOCK && pLock->iTable==1))
&& pLock->eLock>=eLockType
){
return 1;
}
}
/* Failed to find the required lock. */
return 0;
}
#endif /* SQLITE_DEBUG */
#ifdef SQLITE_DEBUG
/*
**** This function may be used as part of assert() statements only. ****
**
** Return true if it would be illegal for pBtree to write into the
** table or index rooted at iRoot because other shared connections are
** simultaneously reading that same table or index.
**
** It is illegal for pBtree to write if some other Btree object that
** shares the same BtShared object is currently reading or writing
** the iRoot table. Except, if the other Btree object has the
** read-uncommitted flag set, then it is OK for the other object to
** have a read cursor.
**
** For example, before writing to any part of the table or index
** rooted at page iRoot, one should call:
**
** assert( !hasReadConflicts(pBtree, iRoot) );
*/
static int hasReadConflicts(Btree *pBtree, Pgno iRoot){
BtCursor *p;
for(p=pBtree->pBt->pCursor; p; p=p->pNext){
if( p->pgnoRoot==iRoot
&& p->pBtree!=pBtree
&& 0==(p->pBtree->db->flags & SQLITE_ReadUncommit)
){
return 1;
}
}
return 0;
}
#endif /* #ifdef SQLITE_DEBUG */
/*
** Query to see if Btree handle p may obtain a lock of type eLock
** (READ_LOCK or WRITE_LOCK) on the table with root-page iTab. Return
** SQLITE_OK if the lock may be obtained (by calling
** setSharedCacheTableLock()), or SQLITE_LOCKED if not.
*/
static int querySharedCacheTableLock(Btree *p, Pgno iTab, u8 eLock){
BtShared *pBt = p->pBt;
BtLock *pIter;
assert( sqlite3BtreeHoldsMutex(p) );
assert( eLock==READ_LOCK || eLock==WRITE_LOCK );
assert( p->db!=0 );
assert( !(p->db->flags&SQLITE_ReadUncommit)||eLock==WRITE_LOCK||iTab==1 );
/* If requesting a write-lock, then the Btree must have an open write
** transaction on this file. And, obviously, for this to be so there
** must be an open write transaction on the file itself.
*/
assert( eLock==READ_LOCK || (p==pBt->pWriter && p->inTrans==TRANS_WRITE) );
assert( eLock==READ_LOCK || pBt->inTransaction==TRANS_WRITE );
/* This routine is a no-op if the shared-cache is not enabled */
if( !p->sharable ){
return SQLITE_OK;
}
/* If some other connection is holding an exclusive lock, the
** requested lock may not be obtained.
*/
if( pBt->pWriter!=p && (pBt->btsFlags & BTS_EXCLUSIVE)!=0 ){
sqlite3ConnectionBlocked(p->db, pBt->pWriter->db);
return SQLITE_LOCKED_SHAREDCACHE;
}
for(pIter=pBt->pLock; pIter; pIter=pIter->pNext){
/* The condition (pIter->eLock!=eLock) in the following if(...)
** statement is a simplification of:
**
** (eLock==WRITE_LOCK || pIter->eLock==WRITE_LOCK)
**
** since we know that if eLock==WRITE_LOCK, then no other connection
** may hold a WRITE_LOCK on any table in this file (since there can
** only be a single writer).
*/
assert( pIter->eLock==READ_LOCK || pIter->eLock==WRITE_LOCK );
assert( eLock==READ_LOCK || pIter->pBtree==p || pIter->eLock==READ_LOCK);
if( pIter->pBtree!=p && pIter->iTable==iTab && pIter->eLock!=eLock ){
sqlite3ConnectionBlocked(p->db, pIter->pBtree->db);
if( eLock==WRITE_LOCK ){
assert( p==pBt->pWriter );
pBt->btsFlags |= BTS_PENDING;
}
return SQLITE_LOCKED_SHAREDCACHE;
}
}
return SQLITE_OK;
}
#endif /* !SQLITE_OMIT_SHARED_CACHE */
#ifndef SQLITE_OMIT_SHARED_CACHE
/*
** Add a lock on the table with root-page iTable to the shared-btree used
** by Btree handle p. Parameter eLock must be either READ_LOCK or
** WRITE_LOCK.
**
** This function assumes the following:
**
** (a) The specified Btree object p is connected to a sharable
** database (one with the BtShared.sharable flag set), and
**
** (b) No other Btree objects hold a lock that conflicts
** with the requested lock (i.e. querySharedCacheTableLock() has
** already been called and returned SQLITE_OK).
**
** SQLITE_OK is returned if the lock is added successfully. SQLITE_NOMEM
** is returned if a malloc attempt fails.
*/
static int setSharedCacheTableLock(Btree *p, Pgno iTable, u8 eLock){
BtShared *pBt = p->pBt;
BtLock *pLock = 0;
BtLock *pIter;
assert( sqlite3BtreeHoldsMutex(p) );
assert( eLock==READ_LOCK || eLock==WRITE_LOCK );
assert( p->db!=0 );
/* A connection with the read-uncommitted flag set will never try to
** obtain a read-lock using this function. The only read-lock obtained
** by a connection in read-uncommitted mode is on the sqlite_master
** table, and that lock is obtained in BtreeBeginTrans(). */
assert( 0==(p->db->flags&SQLITE_ReadUncommit) || eLock==WRITE_LOCK );
/* This function should only be called on a sharable b-tree after it
** has been determined that no other b-tree holds a conflicting lock. */
assert( p->sharable );
assert( SQLITE_OK==querySharedCacheTableLock(p, iTable, eLock) );
/* First search the list for an existing lock on this table. */
for(pIter=pBt->pLock; pIter; pIter=pIter->pNext){
if( pIter->iTable==iTable && pIter->pBtree==p ){
pLock = pIter;
break;
}
}
/* If the above search did not find a BtLock struct associating Btree p
** with table iTable, allocate one and link it into the list.
*/
if( !pLock ){
pLock = (BtLock *)sqlite3MallocZero(sizeof(BtLock));
if( !pLock ){
return SQLITE_NOMEM_BKPT;
}
pLock->iTable = iTable;
pLock->pBtree = p;
pLock->pNext = pBt->pLock;
pBt->pLock = pLock;
}
/* Set the BtLock.eLock variable to the maximum of the current lock
** and the requested lock. This means if a write-lock was already held
** and a read-lock requested, we don't incorrectly downgrade the lock.
*/
assert( WRITE_LOCK>READ_LOCK );
if( eLock>pLock->eLock ){
pLock->eLock = eLock;
}
return SQLITE_OK;
}
#endif /* !SQLITE_OMIT_SHARED_CACHE */
#ifndef SQLITE_OMIT_SHARED_CACHE
/*
** Release all the table locks (locks obtained via calls to
** the setSharedCacheTableLock() procedure) held by Btree object p.
**
** This function assumes that Btree p has an open read or write
** transaction. If it does not, then the BTS_PENDING flag
** may be incorrectly cleared.
*/
static void clearAllSharedCacheTableLocks(Btree *p){
BtShared *pBt = p->pBt;
BtLock **ppIter = &pBt->pLock;
assert( sqlite3BtreeHoldsMutex(p) );
assert( p->sharable || 0==*ppIter );
assert( p->inTrans>0 );
while( *ppIter ){
BtLock *pLock = *ppIter;
assert( (pBt->btsFlags & BTS_EXCLUSIVE)==0 || pBt->pWriter==pLock->pBtree );
assert( pLock->pBtree->inTrans>=pLock->eLock );
if( pLock->pBtree==p ){
*ppIter = pLock->pNext;
assert( pLock->iTable!=1 || pLock==&p->lock );
if( pLock->iTable!=1 ){
sqlite3_free(pLock);
}
}else{
ppIter = &pLock->pNext;
}
}
assert( (pBt->btsFlags & BTS_PENDING)==0 || pBt->pWriter );
if( pBt->pWriter==p ){
pBt->pWriter = 0;
pBt->btsFlags &= ~(BTS_EXCLUSIVE|BTS_PENDING);
}else if( pBt->nTransaction==2 ){
/* This function is called when Btree p is concluding its
** transaction. If there currently exists a writer, and p is not
** that writer, then the number of locks held by connections other
** than the writer must be about to drop to zero. In this case
** set the BTS_PENDING flag to 0.
**
** If there is not currently a writer, then BTS_PENDING must
** be zero already. So this next line is harmless in that case.
*/
pBt->btsFlags &= ~BTS_PENDING;
}
}
/*
** This function changes all write-locks held by Btree p into read-locks.
*/
static void downgradeAllSharedCacheTableLocks(Btree *p){
BtShared *pBt = p->pBt;
if( pBt->pWriter==p ){
BtLock *pLock;
pBt->pWriter = 0;
pBt->btsFlags &= ~(BTS_EXCLUSIVE|BTS_PENDING);
for(pLock=pBt->pLock; pLock; pLock=pLock->pNext){
assert( pLock->eLock==READ_LOCK || pLock->pBtree==p );
pLock->eLock = READ_LOCK;
}
}
}
#endif /* SQLITE_OMIT_SHARED_CACHE */
static void releasePage(MemPage *pPage); /* Forward reference */
static void releasePageOne(MemPage *pPage); /* Forward reference */
static void releasePageNotNull(MemPage *pPage); /* Forward reference */
/*
***** This routine is used inside of assert() only ****
**
** Verify that the cursor holds the mutex on its BtShared
*/
#ifdef SQLITE_DEBUG
static int cursorHoldsMutex(BtCursor *p){
return sqlite3_mutex_held(p->pBt->mutex);
}
/* Verify that the cursor and the BtShared agree about what is the current
** database connetion. This is important in shared-cache mode. If the database
** connection pointers get out-of-sync, it is possible for routines like
** btreeInitPage() to reference an stale connection pointer that references a
** a connection that has already closed. This routine is used inside assert()
** statements only and for the purpose of double-checking that the btree code
** does keep the database connection pointers up-to-date.
*/
static int cursorOwnsBtShared(BtCursor *p){
assert( cursorHoldsMutex(p) );
return (p->pBtree->db==p->pBt->db);
}
#endif
/*
** Invalidate the overflow cache of the cursor passed as the first argument.
** on the shared btree structure pBt.
*/
#define invalidateOverflowCache(pCur) (pCur->curFlags &= ~BTCF_ValidOvfl)
/*
** Invalidate the overflow page-list cache for all cursors opened
** on the shared btree structure pBt.
*/
static void invalidateAllOverflowCache(BtShared *pBt){
BtCursor *p;
assert( sqlite3_mutex_held(pBt->mutex) );
for(p=pBt->pCursor; p; p=p->pNext){
invalidateOverflowCache(p);
}
}
#ifndef SQLITE_OMIT_INCRBLOB
/*
** This function is called before modifying the contents of a table
** to invalidate any incrblob cursors that are open on the
** row or one of the rows being modified.
**
** If argument isClearTable is true, then the entire contents of the
** table is about to be deleted. In this case invalidate all incrblob
** cursors open on any row within the table with root-page pgnoRoot.
**
** Otherwise, if argument isClearTable is false, then the row with
** rowid iRow is being replaced or deleted. In this case invalidate
** only those incrblob cursors open on that specific row.
*/
static void invalidateIncrblobCursors(
Btree *pBtree, /* The database file to check */
Pgno pgnoRoot, /* The table that might be changing */
i64 iRow, /* The rowid that might be changing */
int isClearTable /* True if all rows are being deleted */
){
BtCursor *p;
if( pBtree->hasIncrblobCur==0 ) return;
assert( sqlite3BtreeHoldsMutex(pBtree) );
pBtree->hasIncrblobCur = 0;
for(p=pBtree->pBt->pCursor; p; p=p->pNext){
if( (p->curFlags & BTCF_Incrblob)!=0 ){
pBtree->hasIncrblobCur = 1;
if( p->pgnoRoot==pgnoRoot && (isClearTable || p->info.nKey==iRow) ){
p->eState = CURSOR_INVALID;
}
}
}
}
#else
/* Stub function when INCRBLOB is omitted */
#define invalidateIncrblobCursors(w,x,y,z)
#endif /* SQLITE_OMIT_INCRBLOB */
/*
** Set bit pgno of the BtShared.pHasContent bitvec. This is called
** when a page that previously contained data becomes a free-list leaf
** page.
**
** The BtShared.pHasContent bitvec exists to work around an obscure
** bug caused by the interaction of two useful IO optimizations surrounding
** free-list leaf pages:
**
** 1) When all data is deleted from a page and the page becomes
** a free-list leaf page, the page is not written to the database
** (as free-list leaf pages contain no meaningful data). Sometimes
** such a page is not even journalled (as it will not be modified,
** why bother journalling it?).
**
** 2) When a free-list leaf page is reused, its content is not read
** from the database or written to the journal file (why should it
** be, if it is not at all meaningful?).
**
** By themselves, these optimizations work fine and provide a handy
** performance boost to bulk delete or insert operations. However, if
** a page is moved to the free-list and then reused within the same
** transaction, a problem comes up. If the page is not journalled when
** it is moved to the free-list and it is also not journalled when it
** is extracted from the free-list and reused, then the original data
** may be lost. In the event of a rollback, it may not be possible
** to restore the database to its original configuration.
**
** The solution is the BtShared.pHasContent bitvec. Whenever a page is
** moved to become a free-list leaf page, the corresponding bit is
** set in the bitvec. Whenever a leaf page is extracted from the free-list,
** optimization 2 above is omitted if the corresponding bit is already
** set in BtShared.pHasContent. The contents of the bitvec are cleared
** at the end of every transaction.
*/
static int btreeSetHasContent(BtShared *pBt, Pgno pgno){
int rc = SQLITE_OK;
if( !pBt->pHasContent ){
assert( pgno<=pBt->nPage );
pBt->pHasContent = sqlite3BitvecCreate(pBt->nPage);
if( !pBt->pHasContent ){
rc = SQLITE_NOMEM_BKPT;
}
}
if( rc==SQLITE_OK && pgno<=sqlite3BitvecSize(pBt->pHasContent) ){
rc = sqlite3BitvecSet(pBt->pHasContent, pgno);
}
return rc;
}
/*
** Query the BtShared.pHasContent vector.
**
** This function is called when a free-list leaf page is removed from the
** free-list for reuse. It returns false if it is safe to retrieve the
** page from the pager layer with the 'no-content' flag set. True otherwise.
*/
static int btreeGetHasContent(BtShared *pBt, Pgno pgno){
Bitvec *p = pBt->pHasContent;
return (p && (pgno>sqlite3BitvecSize(p) || sqlite3BitvecTest(p, pgno)));
}
/*
** Clear (destroy) the BtShared.pHasContent bitvec. This should be
** invoked at the conclusion of each write-transaction.
*/
static void btreeClearHasContent(BtShared *pBt){
sqlite3BitvecDestroy(pBt->pHasContent);
pBt->pHasContent = 0;
}
/*
** Release all of the apPage[] pages for a cursor.
*/
static void btreeReleaseAllCursorPages(BtCursor *pCur){
int i;
if( pCur->iPage>=0 ){
for(i=0; i<pCur->iPage; i++){
releasePageNotNull(pCur->apPage[i]);
}
releasePageNotNull(pCur->pPage);
pCur->iPage = -1;
}
}
/*
** The cursor passed as the only argument must point to a valid entry
** when this function is called (i.e. have eState==CURSOR_VALID). This
** function saves the current cursor key in variables pCur->nKey and
** pCur->pKey. SQLITE_OK is returned if successful or an SQLite error
** code otherwise.
**
** If the cursor is open on an intkey table, then the integer key
** (the rowid) is stored in pCur->nKey and pCur->pKey is left set to
** NULL. If the cursor is open on a non-intkey table, then pCur->pKey is
** set to point to a malloced buffer pCur->nKey bytes in size containing
** the key.
*/
static int saveCursorKey(BtCursor *pCur){
int rc = SQLITE_OK;
assert( CURSOR_VALID==pCur->eState );
assert( 0==pCur->pKey );
assert( cursorHoldsMutex(pCur) );
if( pCur->curIntKey ){
/* Only the rowid is required for a table btree */
pCur->nKey = sqlite3BtreeIntegerKey(pCur);
}else{
/* For an index btree, save the complete key content. It is possible
** that the current key is corrupt. In that case, it is possible that
** the sqlite3VdbeRecordUnpack() function may overread the buffer by
** up to the size of 1 varint plus 1 8-byte value when the cursor
** position is restored. Hence the 17 bytes of padding allocated
** below. */
void *pKey;
pCur->nKey = sqlite3BtreePayloadSize(pCur);
pKey = sqlite3Malloc( pCur->nKey + 9 + 8 );
if( pKey ){
rc = sqlite3BtreePayload(pCur, 0, (int)pCur->nKey, pKey);
if( rc==SQLITE_OK ){
memset(((u8*)pKey)+pCur->nKey, 0, 9+8);
pCur->pKey = pKey;
}else{
sqlite3_free(pKey);
}
}else{
rc = SQLITE_NOMEM_BKPT;
}
}
assert( !pCur->curIntKey || !pCur->pKey );
return rc;
}
/*
** Save the current cursor position in the variables BtCursor.nKey
** and BtCursor.pKey. The cursor's state is set to CURSOR_REQUIRESEEK.
**
** The caller must ensure that the cursor is valid (has eState==CURSOR_VALID)
** prior to calling this routine.
*/
static int saveCursorPosition(BtCursor *pCur){
int rc;
assert( CURSOR_VALID==pCur->eState || CURSOR_SKIPNEXT==pCur->eState );
assert( 0==pCur->pKey );
assert( cursorHoldsMutex(pCur) );
if( pCur->eState==CURSOR_SKIPNEXT ){
pCur->eState = CURSOR_VALID;
}else{
pCur->skipNext = 0;
}
rc = saveCursorKey(pCur);
if( rc==SQLITE_OK ){
btreeReleaseAllCursorPages(pCur);
pCur->eState = CURSOR_REQUIRESEEK;
}
pCur->curFlags &= ~(BTCF_ValidNKey|BTCF_ValidOvfl|BTCF_AtLast);
return rc;
}
/* Forward reference */
static int SQLITE_NOINLINE saveCursorsOnList(BtCursor*,Pgno,BtCursor*);
/*
** Save the positions of all cursors (except pExcept) that are open on
** the table with root-page iRoot. "Saving the cursor position" means that
** the location in the btree is remembered in such a way that it can be
** moved back to the same spot after the btree has been modified. This
** routine is called just before cursor pExcept is used to modify the
** table, for example in BtreeDelete() or BtreeInsert().
**
** If there are two or more cursors on the same btree, then all such
** cursors should have their BTCF_Multiple flag set. The btreeCursor()
** routine enforces that rule. This routine only needs to be called in
** the uncommon case when pExpect has the BTCF_Multiple flag set.
**
** If pExpect!=NULL and if no other cursors are found on the same root-page,
** then the BTCF_Multiple flag on pExpect is cleared, to avoid another
** pointless call to this routine.
**
** Implementation note: This routine merely checks to see if any cursors
** need to be saved. It calls out to saveCursorsOnList() in the (unusual)
** event that cursors are in need to being saved.
*/
static int saveAllCursors(BtShared *pBt, Pgno iRoot, BtCursor *pExcept){
BtCursor *p;
assert( sqlite3_mutex_held(pBt->mutex) );
assert( pExcept==0 || pExcept->pBt==pBt );
for(p=pBt->pCursor; p; p=p->pNext){
if( p!=pExcept && (0==iRoot || p->pgnoRoot==iRoot) ) break;
}
if( p ) return saveCursorsOnList(p, iRoot, pExcept);
if( pExcept ) pExcept->curFlags &= ~BTCF_Multiple;
return SQLITE_OK;
}
/* This helper routine to saveAllCursors does the actual work of saving
** the cursors if and when a cursor is found that actually requires saving.
** The common case is that no cursors need to be saved, so this routine is
** broken out from its caller to avoid unnecessary stack pointer movement.
*/
static int SQLITE_NOINLINE saveCursorsOnList(
BtCursor *p, /* The first cursor that needs saving */
Pgno iRoot, /* Only save cursor with this iRoot. Save all if zero */
BtCursor *pExcept /* Do not save this cursor */
){
do{
if( p!=pExcept && (0==iRoot || p->pgnoRoot==iRoot) ){
if( p->eState==CURSOR_VALID || p->eState==CURSOR_SKIPNEXT ){
int rc = saveCursorPosition(p);
if( SQLITE_OK!=rc ){
return rc;
}
}else{
testcase( p->iPage>=0 );
btreeReleaseAllCursorPages(p);
}
}
p = p->pNext;
}while( p );
return SQLITE_OK;
}
/*
** Clear the current cursor position.
*/
void sqlite3BtreeClearCursor(BtCursor *pCur){
assert( cursorHoldsMutex(pCur) );
sqlite3_free(pCur->pKey);
pCur->pKey = 0;
pCur->eState = CURSOR_INVALID;
}
/*
** In this version of BtreeMoveto, pKey is a packed index record
** such as is generated by the OP_MakeRecord opcode. Unpack the
** record and then call BtreeMovetoUnpacked() to do the work.
*/
static int btreeMoveto(
BtCursor *pCur, /* Cursor open on the btree to be searched */
const void *pKey, /* Packed key if the btree is an index */
i64 nKey, /* Integer key for tables. Size of pKey for indices */
int bias, /* Bias search to the high end */
int *pRes /* Write search results here */
){
int rc; /* Status code */
UnpackedRecord *pIdxKey; /* Unpacked index key */
if( pKey ){
KeyInfo *pKeyInfo = pCur->pKeyInfo;
assert( nKey==(i64)(int)nKey );
pIdxKey = sqlite3VdbeAllocUnpackedRecord(pKeyInfo);
if( pIdxKey==0 ) return SQLITE_NOMEM_BKPT;
sqlite3VdbeRecordUnpack(pKeyInfo, (int)nKey, pKey, pIdxKey);
if( pIdxKey->nField==0 || pIdxKey->nField>pKeyInfo->nAllField ){
rc = SQLITE_CORRUPT_BKPT;
goto moveto_done;
}
}else{
pIdxKey = 0;
}
rc = sqlite3BtreeMovetoUnpacked(pCur, pIdxKey, nKey, bias, pRes);
moveto_done:
if( pIdxKey ){
sqlite3DbFree(pCur->pKeyInfo->db, pIdxKey);
}
return rc;
}
/*
** Restore the cursor to the position it was in (or as close to as possible)
** when saveCursorPosition() was called. Note that this call deletes the
** saved position info stored by saveCursorPosition(), so there can be
** at most one effective restoreCursorPosition() call after each
** saveCursorPosition().
*/
static int btreeRestoreCursorPosition(BtCursor *pCur){
int rc;
int skipNext = 0;
assert( cursorOwnsBtShared(pCur) );
assert( pCur->eState>=CURSOR_REQUIRESEEK );
if( pCur->eState==CURSOR_FAULT ){
return pCur->skipNext;
}
pCur->eState = CURSOR_INVALID;
if( sqlite3FaultSim(410) ){
rc = SQLITE_IOERR;
}else{
rc = btreeMoveto(pCur, pCur->pKey, pCur->nKey, 0, &skipNext);
}
if( rc==SQLITE_OK ){
sqlite3_free(pCur->pKey);
pCur->pKey = 0;
assert( pCur->eState==CURSOR_VALID || pCur->eState==CURSOR_INVALID );
if( skipNext ) pCur->skipNext = skipNext;
if( pCur->skipNext && pCur->eState==CURSOR_VALID ){
pCur->eState = CURSOR_SKIPNEXT;
}
}
return rc;
}
#define restoreCursorPosition(p) \
(p->eState>=CURSOR_REQUIRESEEK ? \
btreeRestoreCursorPosition(p) : \
SQLITE_OK)
/*
** Determine whether or not a cursor has moved from the position where
** it was last placed, or has been invalidated for any other reason.
** Cursors can move when the row they are pointing at is deleted out
** from under them, for example. Cursor might also move if a btree
** is rebalanced.
**
** Calling this routine with a NULL cursor pointer returns false.
**
** Use the separate sqlite3BtreeCursorRestore() routine to restore a cursor
** back to where it ought to be if this routine returns true.
*/
int sqlite3BtreeCursorHasMoved(BtCursor *pCur){
assert( EIGHT_BYTE_ALIGNMENT(pCur)
|| pCur==sqlite3BtreeFakeValidCursor() );
assert( offsetof(BtCursor, eState)==0 );
assert( sizeof(pCur->eState)==1 );
return CURSOR_VALID != *(u8*)pCur;
}
/*
** Return a pointer to a fake BtCursor object that will always answer
** false to the sqlite3BtreeCursorHasMoved() routine above. The fake
** cursor returned must not be used with any other Btree interface.
*/
BtCursor *sqlite3BtreeFakeValidCursor(void){
static u8 fakeCursor = CURSOR_VALID;
assert( offsetof(BtCursor, eState)==0 );
return (BtCursor*)&fakeCursor;
}
/*
** This routine restores a cursor back to its original position after it
** has been moved by some outside activity (such as a btree rebalance or
** a row having been deleted out from under the cursor).
**
** On success, the *pDifferentRow parameter is false if the cursor is left
** pointing at exactly the same row. *pDifferntRow is the row the cursor
** was pointing to has been deleted, forcing the cursor to point to some
** nearby row.
**
** This routine should only be called for a cursor that just returned
** TRUE from sqlite3BtreeCursorHasMoved().
*/
int sqlite3BtreeCursorRestore(BtCursor *pCur, int *pDifferentRow){
int rc;
assert( pCur!=0 );
assert( pCur->eState!=CURSOR_VALID );
rc = restoreCursorPosition(pCur);
if( rc ){
*pDifferentRow = 1;
return rc;
}
if( pCur->eState!=CURSOR_VALID ){
*pDifferentRow = 1;
}else{
*pDifferentRow = 0;
}
return SQLITE_OK;
}
#ifdef SQLITE_ENABLE_CURSOR_HINTS
/*
** Provide hints to the cursor. The particular hint given (and the type
** and number of the varargs parameters) is determined by the eHintType
** parameter. See the definitions of the BTREE_HINT_* macros for details.
*/
void sqlite3BtreeCursorHint(BtCursor *pCur, int eHintType, ...){
/* Used only by system that substitute their own storage engine */
}
#endif
/*
** Provide flag hints to the cursor.
*/
void sqlite3BtreeCursorHintFlags(BtCursor *pCur, unsigned x){
assert( x==BTREE_SEEK_EQ || x==BTREE_BULKLOAD || x==0 );
pCur->hints = x;
}
#ifndef SQLITE_OMIT_AUTOVACUUM
/*
** Given a page number of a regular database page, return the page
** number for the pointer-map page that contains the entry for the
** input page number.
**
** Return 0 (not a valid page) for pgno==1 since there is
** no pointer map associated with page 1. The integrity_check logic
** requires that ptrmapPageno(*,1)!=1.
*/
static Pgno ptrmapPageno(BtShared *pBt, Pgno pgno){
int nPagesPerMapPage;
Pgno iPtrMap, ret;
assert( sqlite3_mutex_held(pBt->mutex) );
if( pgno<2 ) return 0;
nPagesPerMapPage = (pBt->usableSize/5)+1;
iPtrMap = (pgno-2)/nPagesPerMapPage;
ret = (iPtrMap*nPagesPerMapPage) + 2;
if( ret==PENDING_BYTE_PAGE(pBt) ){
ret++;
}
return ret;
}
/*
** Write an entry into the pointer map.
**
** This routine updates the pointer map entry for page number 'key'
** so that it maps to type 'eType' and parent page number 'pgno'.
**
** If *pRC is initially non-zero (non-SQLITE_OK) then this routine is
** a no-op. If an error occurs, the appropriate error code is written
** into *pRC.
*/
static void ptrmapPut(BtShared *pBt, Pgno key, u8 eType, Pgno parent, int *pRC){
DbPage *pDbPage; /* The pointer map page */
u8 *pPtrmap; /* The pointer map data */
Pgno iPtrmap; /* The pointer map page number */
int offset; /* Offset in pointer map page */
int rc; /* Return code from subfunctions */
if( *pRC ) return;
assert( sqlite3_mutex_held(pBt->mutex) );
/* The master-journal page number must never be used as a pointer map page */
assert( 0==PTRMAP_ISPAGE(pBt, PENDING_BYTE_PAGE(pBt)) );
assert( pBt->autoVacuum );
if( key==0 ){
*pRC = SQLITE_CORRUPT_BKPT;
return;
}
iPtrmap = PTRMAP_PAGENO(pBt, key);
rc = sqlite3PagerGet(pBt->pPager, iPtrmap, &pDbPage, 0);
if( rc!=SQLITE_OK ){
*pRC = rc;
return;
}
if( ((char*)sqlite3PagerGetExtra(pDbPage))[0]!=0 ){
/* The first byte of the extra data is the MemPage.isInit byte.
** If that byte is set, it means this page is also being used
** as a btree page. */
*pRC = SQLITE_CORRUPT_BKPT;
goto ptrmap_exit;
}
offset = PTRMAP_PTROFFSET(iPtrmap, key);
if( offset<0 ){
*pRC = SQLITE_CORRUPT_BKPT;
goto ptrmap_exit;
}
assert( offset <= (int)pBt->usableSize-5 );
pPtrmap = (u8 *)sqlite3PagerGetData(pDbPage);
if( eType!=pPtrmap[offset] || get4byte(&pPtrmap[offset+1])!=parent ){
TRACE(("PTRMAP_UPDATE: %d->(%d,%d)\n", key, eType, parent));
*pRC= rc = sqlite3PagerWrite(pDbPage);
if( rc==SQLITE_OK ){
pPtrmap[offset] = eType;
put4byte(&pPtrmap[offset+1], parent);
}
}
ptrmap_exit:
sqlite3PagerUnref(pDbPage);
}
/*
** Read an entry from the pointer map.
**
** This routine retrieves the pointer map entry for page 'key', writing
** the type and parent page number to *pEType and *pPgno respectively.
** An error code is returned if something goes wrong, otherwise SQLITE_OK.
*/
static int ptrmapGet(BtShared *pBt, Pgno key, u8 *pEType, Pgno *pPgno){
DbPage *pDbPage; /* The pointer map page */
int iPtrmap; /* Pointer map page index */
u8 *pPtrmap; /* Pointer map page data */
int offset; /* Offset of entry in pointer map */
int rc;
assert( sqlite3_mutex_held(pBt->mutex) );
iPtrmap = PTRMAP_PAGENO(pBt, key);
rc = sqlite3PagerGet(pBt->pPager, iPtrmap, &pDbPage, 0);
if( rc!=0 ){
return rc;
}
pPtrmap = (u8 *)sqlite3PagerGetData(pDbPage);
offset = PTRMAP_PTROFFSET(iPtrmap, key);
if( offset<0 ){
sqlite3PagerUnref(pDbPage);
return SQLITE_CORRUPT_BKPT;
}
assert( offset <= (int)pBt->usableSize-5 );
assert( pEType!=0 );
*pEType = pPtrmap[offset];
if( pPgno ) *pPgno = get4byte(&pPtrmap[offset+1]);
sqlite3PagerUnref(pDbPage);
if( *pEType<1 || *pEType>5 ) return SQLITE_CORRUPT_PGNO(iPtrmap);
return SQLITE_OK;
}
#else /* if defined SQLITE_OMIT_AUTOVACUUM */
#define ptrmapPut(w,x,y,z,rc)
#define ptrmapGet(w,x,y,z) SQLITE_OK
#define ptrmapPutOvflPtr(x, y, z, rc)
#endif
/*
** Given a btree page and a cell index (0 means the first cell on
** the page, 1 means the second cell, and so forth) return a pointer
** to the cell content.
**
** findCellPastPtr() does the same except it skips past the initial
** 4-byte child pointer found on interior pages, if there is one.
**
** This routine works only for pages that do not contain overflow cells.
*/
#define findCell(P,I) \
((P)->aData + ((P)->maskPage & get2byteAligned(&(P)->aCellIdx[2*(I)])))
#define findCellPastPtr(P,I) \
((P)->aDataOfst + ((P)->maskPage & get2byteAligned(&(P)->aCellIdx[2*(I)])))
/*
** This is common tail processing for btreeParseCellPtr() and
** btreeParseCellPtrIndex() for the case when the cell does not fit entirely
** on a single B-tree page. Make necessary adjustments to the CellInfo
** structure.
*/
static SQLITE_NOINLINE void btreeParseCellAdjustSizeForOverflow(
MemPage *pPage, /* Page containing the cell */
u8 *pCell, /* Pointer to the cell text. */
CellInfo *pInfo /* Fill in this structure */
){
/* If the payload will not fit completely on the local page, we have
** to decide how much to store locally and how much to spill onto
** overflow pages. The strategy is to minimize the amount of unused
** space on overflow pages while keeping the amount of local storage
** in between minLocal and maxLocal.
**
** Warning: changing the way overflow payload is distributed in any
** way will result in an incompatible file format.
*/
int minLocal; /* Minimum amount of payload held locally */
int maxLocal; /* Maximum amount of payload held locally */
int surplus; /* Overflow payload available for local storage */
minLocal = pPage->minLocal;
maxLocal = pPage->maxLocal;
surplus = minLocal + (pInfo->nPayload - minLocal)%(pPage->pBt->usableSize-4);
testcase( surplus==maxLocal );
testcase( surplus==maxLocal+1 );
if( surplus <= maxLocal ){
pInfo->nLocal = (u16)surplus;
}else{
pInfo->nLocal = (u16)minLocal;
}
pInfo->nSize = (u16)(&pInfo->pPayload[pInfo->nLocal] - pCell) + 4;
}
/*
** The following routines are implementations of the MemPage.xParseCell()
** method.
**
** Parse a cell content block and fill in the CellInfo structure.
**
** btreeParseCellPtr() => table btree leaf nodes
** btreeParseCellNoPayload() => table btree internal nodes
** btreeParseCellPtrIndex() => index btree nodes
**
** There is also a wrapper function btreeParseCell() that works for
** all MemPage types and that references the cell by index rather than
** by pointer.
*/
static void btreeParseCellPtrNoPayload(
MemPage *pPage, /* Page containing the cell */
u8 *pCell, /* Pointer to the cell text. */
CellInfo *pInfo /* Fill in this structure */
){
assert( sqlite3_mutex_held(pPage->pBt->mutex) );
assert( pPage->leaf==0 );
assert( pPage->childPtrSize==4 );
#ifndef SQLITE_DEBUG
UNUSED_PARAMETER(pPage);
#endif
pInfo->nSize = 4 + getVarint(&pCell[4], (u64*)&pInfo->nKey);
pInfo->nPayload = 0;
pInfo->nLocal = 0;
pInfo->pPayload = 0;
return;
}
static void btreeParseCellPtr(
MemPage *pPage, /* Page containing the cell */
u8 *pCell, /* Pointer to the cell text. */
CellInfo *pInfo /* Fill in this structure */
){
u8 *pIter; /* For scanning through pCell */
u32 nPayload; /* Number of bytes of cell payload */
u64 iKey; /* Extracted Key value */
assert( sqlite3_mutex_held(pPage->pBt->mutex) );
assert( pPage->leaf==0 || pPage->leaf==1 );
assert( pPage->intKeyLeaf );
assert( pPage->childPtrSize==0 );
pIter = pCell;
/* The next block of code is equivalent to:
**
** pIter += getVarint32(pIter, nPayload);
**
** The code is inlined to avoid a function call.
*/
nPayload = *pIter;
if( nPayload>=0x80 ){
u8 *pEnd = &pIter[8];
nPayload &= 0x7f;
do{
nPayload = (nPayload<<7) | (*++pIter & 0x7f);
}while( (*pIter)>=0x80 && pIter<pEnd );
}
pIter++;
/* The next block of code is equivalent to:
**
** pIter += getVarint(pIter, (u64*)&pInfo->nKey);
**
** The code is inlined to avoid a function call.
*/
iKey = *pIter;
if( iKey>=0x80 ){
u8 *pEnd = &pIter[7];
iKey &= 0x7f;
while(1){
iKey = (iKey<<7) | (*++pIter & 0x7f);
if( (*pIter)<0x80 ) break;
if( pIter>=pEnd ){
iKey = (iKey<<8) | *++pIter;
break;
}
}
}
pIter++;
pInfo->nKey = *(i64*)&iKey;
pInfo->nPayload = nPayload;
pInfo->pPayload = pIter;
testcase( nPayload==pPage->maxLocal );
testcase( nPayload==pPage->maxLocal+1 );
if( nPayload<=pPage->maxLocal ){
/* This is the (easy) common case where the entire payload fits
** on the local page. No overflow is required.
*/
pInfo->nSize = nPayload + (u16)(pIter - pCell);
if( pInfo->nSize<4 ) pInfo->nSize = 4;
pInfo->nLocal = (u16)nPayload;
}else{
btreeParseCellAdjustSizeForOverflow(pPage, pCell, pInfo);
}
}
static void btreeParseCellPtrIndex(
MemPage *pPage, /* Page containing the cell */
u8 *pCell, /* Pointer to the cell text. */
CellInfo *pInfo /* Fill in this structure */
){
u8 *pIter; /* For scanning through pCell */
u32 nPayload; /* Number of bytes of cell payload */
assert( sqlite3_mutex_held(pPage->pBt->mutex) );
assert( pPage->leaf==0 || pPage->leaf==1 );
assert( pPage->intKeyLeaf==0 );
pIter = pCell + pPage->childPtrSize;
nPayload = *pIter;
if( nPayload>=0x80 ){
u8 *pEnd = &pIter[8];
nPayload &= 0x7f;
do{
nPayload = (nPayload<<7) | (*++pIter & 0x7f);
}while( *(pIter)>=0x80 && pIter<pEnd );
}
pIter++;
pInfo->nKey = nPayload;
pInfo->nPayload = nPayload;
pInfo->pPayload = pIter;
testcase( nPayload==pPage->maxLocal );
testcase( nPayload==pPage->maxLocal+1 );
if( nPayload<=pPage->maxLocal ){
/* This is the (easy) common case where the entire payload fits
** on the local page. No overflow is required.
*/
pInfo->nSize = nPayload + (u16)(pIter - pCell);
if( pInfo->nSize<4 ) pInfo->nSize = 4;
pInfo->nLocal = (u16)nPayload;
}else{
btreeParseCellAdjustSizeForOverflow(pPage, pCell, pInfo);
}
}
static void btreeParseCell(
MemPage *pPage, /* Page containing the cell */
int iCell, /* The cell index. First cell is 0 */
CellInfo *pInfo /* Fill in this structure */
){
pPage->xParseCell(pPage, findCell(pPage, iCell), pInfo);
}
/*
** The following routines are implementations of the MemPage.xCellSize
** method.
**
** Compute the total number of bytes that a Cell needs in the cell
** data area of the btree-page. The return number includes the cell
** data header and the local payload, but not any overflow page or
** the space used by the cell pointer.
**
** cellSizePtrNoPayload() => table internal nodes
** cellSizePtr() => all index nodes & table leaf nodes
*/
static u16 cellSizePtr(MemPage *pPage, u8 *pCell){
u8 *pIter = pCell + pPage->childPtrSize; /* For looping over bytes of pCell */
u8 *pEnd; /* End mark for a varint */
u32 nSize; /* Size value to return */
#ifdef SQLITE_DEBUG
/* The value returned by this function should always be the same as
** the (CellInfo.nSize) value found by doing a full parse of the
** cell. If SQLITE_DEBUG is defined, an assert() at the bottom of
** this function verifies that this invariant is not violated. */
CellInfo debuginfo;
pPage->xParseCell(pPage, pCell, &debuginfo);
#endif
nSize = *pIter;
if( nSize>=0x80 ){
pEnd = &pIter[8];
nSize &= 0x7f;
do{
nSize = (nSize<<7) | (*++pIter & 0x7f);
}while( *(pIter)>=0x80 && pIter<pEnd );
}
pIter++;
if( pPage->intKey ){
/* pIter now points at the 64-bit integer key value, a variable length
** integer. The following block moves pIter to point at the first byte
** past the end of the key value. */
pEnd = &pIter[9];
while( (*pIter++)&0x80 && pIter<pEnd );
}
testcase( nSize==pPage->maxLocal );
testcase( nSize==pPage->maxLocal+1 );
if( nSize<=pPage->maxLocal ){
nSize += (u32)(pIter - pCell);
if( nSize<4 ) nSize = 4;
}else{
int minLocal = pPage->minLocal;
nSize = minLocal + (nSize - minLocal) % (pPage->pBt->usableSize - 4);
testcase( nSize==pPage->maxLocal );
testcase( nSize==pPage->maxLocal+1 );
if( nSize>pPage->maxLocal ){
nSize = minLocal;
}
nSize += 4 + (u16)(pIter - pCell);
}
assert( nSize==debuginfo.nSize || CORRUPT_DB );
return (u16)nSize;
}
static u16 cellSizePtrNoPayload(MemPage *pPage, u8 *pCell){
u8 *pIter = pCell + 4; /* For looping over bytes of pCell */
u8 *pEnd; /* End mark for a varint */
#ifdef SQLITE_DEBUG
/* The value returned by this function should always be the same as
** the (CellInfo.nSize) value found by doing a full parse of the
** cell. If SQLITE_DEBUG is defined, an assert() at the bottom of
** this function verifies that this invariant is not violated. */
CellInfo debuginfo;
pPage->xParseCell(pPage, pCell, &debuginfo);
#else
UNUSED_PARAMETER(pPage);
#endif
assert( pPage->childPtrSize==4 );
pEnd = pIter + 9;
while( (*pIter++)&0x80 && pIter<pEnd );
assert( debuginfo.nSize==(u16)(pIter - pCell) || CORRUPT_DB );
return (u16)(pIter - pCell);
}
#ifdef SQLITE_DEBUG
/* This variation on cellSizePtr() is used inside of assert() statements
** only. */
static u16 cellSize(MemPage *pPage, int iCell){
return pPage->xCellSize(pPage, findCell(pPage, iCell));
}
#endif
#ifndef SQLITE_OMIT_AUTOVACUUM
/*
** The cell pCell is currently part of page pSrc but will ultimately be part
** of pPage. (pSrc and pPager are often the same.) If pCell contains a
** pointer to an overflow page, insert an entry into the pointer-map for
** the overflow page that will be valid after pCell has been moved to pPage.
*/
static void ptrmapPutOvflPtr(MemPage *pPage, MemPage *pSrc, u8 *pCell,int *pRC){
CellInfo info;
if( *pRC ) return;
assert( pCell!=0 );
pPage->xParseCell(pPage, pCell, &info);
if( info.nLocal<info.nPayload ){
Pgno ovfl;
if( SQLITE_WITHIN(pSrc->aDataEnd, pCell, pCell+info.nLocal) ){
testcase( pSrc!=pPage );
*pRC = SQLITE_CORRUPT_BKPT;
return;
}
ovfl = get4byte(&pCell[info.nSize-4]);
ptrmapPut(pPage->pBt, ovfl, PTRMAP_OVERFLOW1, pPage->pgno, pRC);
}
}
#endif
/*
** Defragment the page given. This routine reorganizes cells within the
** page so that there are no free-blocks on the free-block list.
**
** Parameter nMaxFrag is the maximum amount of fragmented space that may be
** present in the page after this routine returns.
**
** EVIDENCE-OF: R-44582-60138 SQLite may from time to time reorganize a
** b-tree page so that there are no freeblocks or fragment bytes, all
** unused bytes are contained in the unallocated space region, and all
** cells are packed tightly at the end of the page.
*/
static int defragmentPage(MemPage *pPage, int nMaxFrag){
int i; /* Loop counter */
int pc; /* Address of the i-th cell */
int hdr; /* Offset to the page header */
int size; /* Size of a cell */
int usableSize; /* Number of usable bytes on a page */
int cellOffset; /* Offset to the cell pointer array */
int cbrk; /* Offset to the cell content area */
int nCell; /* Number of cells on the page */
unsigned char *data; /* The page data */
unsigned char *temp; /* Temp area for cell content */
unsigned char *src; /* Source of content */
int iCellFirst; /* First allowable cell index */
int iCellLast; /* Last possible cell index */
assert( sqlite3PagerIswriteable(pPage->pDbPage) );
assert( pPage->pBt!=0 );
assert( pPage->pBt->usableSize <= SQLITE_MAX_PAGE_SIZE );
assert( pPage->nOverflow==0 );
assert( sqlite3_mutex_held(pPage->pBt->mutex) );
temp = 0;
src = data = pPage->aData;
hdr = pPage->hdrOffset;
cellOffset = pPage->cellOffset;
nCell = pPage->nCell;
assert( nCell==get2byte(&data[hdr+3]) || CORRUPT_DB );
iCellFirst = cellOffset + 2*nCell;
usableSize = pPage->pBt->usableSize;
/* This block handles pages with two or fewer free blocks and nMaxFrag
** or fewer fragmented bytes. In this case it is faster to move the
** two (or one) blocks of cells using memmove() and add the required
** offsets to each pointer in the cell-pointer array than it is to
** reconstruct the entire page. */
if( (int)data[hdr+7]<=nMaxFrag ){
int iFree = get2byte(&data[hdr+1]);
if( iFree>usableSize-4 ) return SQLITE_CORRUPT_PAGE(pPage);
if( iFree ){
int iFree2 = get2byte(&data[iFree]);
if( iFree2>usableSize-4 ) return SQLITE_CORRUPT_PAGE(pPage);
if( 0==iFree2 || (data[iFree2]==0 && data[iFree2+1]==0) ){
u8 *pEnd = &data[cellOffset + nCell*2];
u8 *pAddr;
int sz2 = 0;
int sz = get2byte(&data[iFree+2]);
int top = get2byte(&data[hdr+5]);
if( top>=iFree ){
return SQLITE_CORRUPT_PAGE(pPage);
}
if( iFree2 ){
if( iFree+sz>iFree2 ) return SQLITE_CORRUPT_PAGE(pPage);
sz2 = get2byte(&data[iFree2+2]);
if( iFree2+sz2 > usableSize ) return SQLITE_CORRUPT_PAGE(pPage);
memmove(&data[iFree+sz+sz2], &data[iFree+sz], iFree2-(iFree+sz));
sz += sz2;
}else if( iFree+sz>usableSize ){
return SQLITE_CORRUPT_PAGE(pPage);
}
cbrk = top+sz;
assert( cbrk+(iFree-top) <= usableSize );
memmove(&data[cbrk], &data[top], iFree-top);
for(pAddr=&data[cellOffset]; pAddr<pEnd; pAddr+=2){
pc = get2byte(pAddr);
if( pc<iFree ){ put2byte(pAddr, pc+sz); }
else if( pc<iFree2 ){ put2byte(pAddr, pc+sz2); }
}
goto defragment_out;
}
}
}
cbrk = usableSize;
iCellLast = usableSize - 4;
for(i=0; i<nCell; i++){
u8 *pAddr; /* The i-th cell pointer */
pAddr = &data[cellOffset + i*2];
pc = get2byte(pAddr);
testcase( pc==iCellFirst );
testcase( pc==iCellLast );
/* These conditions have already been verified in btreeInitPage()
** if PRAGMA cell_size_check=ON.
*/
if( pc<iCellFirst || pc>iCellLast ){
return SQLITE_CORRUPT_PAGE(pPage);
}
assert( pc>=iCellFirst && pc<=iCellLast );
size = pPage->xCellSize(pPage, &src[pc]);
cbrk -= size;
if( cbrk<iCellFirst || pc+size>usableSize ){
return SQLITE_CORRUPT_PAGE(pPage);
}
assert( cbrk+size<=usableSize && cbrk>=iCellFirst );
testcase( cbrk+size==usableSize );
testcase( pc+size==usableSize );
put2byte(pAddr, cbrk);
if( temp==0 ){
int x;
if( cbrk==pc ) continue;
temp = sqlite3PagerTempSpace(pPage->pBt->pPager);
x = get2byte(&data[hdr+5]);
memcpy(&temp[x], &data[x], (cbrk+size) - x);
src = temp;
}
memcpy(&data[cbrk], &src[pc], size);
}
data[hdr+7] = 0;
defragment_out:
assert( pPage->nFree>=0 );
if( data[hdr+7]+cbrk-iCellFirst!=pPage->nFree ){
return SQLITE_CORRUPT_PAGE(pPage);
}
assert( cbrk>=iCellFirst );
put2byte(&data[hdr+5], cbrk);
data[hdr+1] = 0;
data[hdr+2] = 0;
memset(&data[iCellFirst], 0, cbrk-iCellFirst);
assert( sqlite3PagerIswriteable(pPage->pDbPage) );
return SQLITE_OK;
}
/*
** Search the free-list on page pPg for space to store a cell nByte bytes in
** size. If one can be found, return a pointer to the space and remove it
** from the free-list.
**
** If no suitable space can be found on the free-list, return NULL.
**
** This function may detect corruption within pPg. If corruption is
** detected then *pRc is set to SQLITE_CORRUPT and NULL is returned.
**
** Slots on the free list that are between 1 and 3 bytes larger than nByte
** will be ignored if adding the extra space to the fragmentation count
** causes the fragmentation count to exceed 60.
*/
static u8 *pageFindSlot(MemPage *pPg, int nByte, int *pRc){
const int hdr = pPg->hdrOffset; /* Offset to page header */
u8 * const aData = pPg->aData; /* Page data */
int iAddr = hdr + 1; /* Address of ptr to pc */
int pc = get2byte(&aData[iAddr]); /* Address of a free slot */
int x; /* Excess size of the slot */
int maxPC = pPg->pBt->usableSize - nByte; /* Max address for a usable slot */
int size; /* Size of the free slot */
assert( pc>0 );
while( pc<=maxPC ){
/* EVIDENCE-OF: R-22710-53328 The third and fourth bytes of each
** freeblock form a big-endian integer which is the size of the freeblock
** in bytes, including the 4-byte header. */
size = get2byte(&aData[pc+2]);
if( (x = size - nByte)>=0 ){
testcase( x==4 );
testcase( x==3 );
if( x<4 ){
/* EVIDENCE-OF: R-11498-58022 In a well-formed b-tree page, the total
** number of bytes in fragments may not exceed 60. */
if( aData[hdr+7]>57 ) return 0;
/* Remove the slot from the free-list. Update the number of
** fragmented bytes within the page. */
memcpy(&aData[iAddr], &aData[pc], 2);
aData[hdr+7] += (u8)x;
}else if( x+pc > maxPC ){
/* This slot extends off the end of the usable part of the page */
*pRc = SQLITE_CORRUPT_PAGE(pPg);
return 0;
}else{
/* The slot remains on the free-list. Reduce its size to account
** for the portion used by the new allocation. */
put2byte(&aData[pc+2], x);
}
return &aData[pc + x];
}
iAddr = pc;
pc = get2byte(&aData[pc]);
if( pc<=iAddr+size ){
if( pc ){
/* The next slot in the chain is not past the end of the current slot */
*pRc = SQLITE_CORRUPT_PAGE(pPg);
}
return 0;
}
}
if( pc>maxPC+nByte-4 ){
/* The free slot chain extends off the end of the page */
*pRc = SQLITE_CORRUPT_PAGE(pPg);
}
return 0;
}
/*
** Allocate nByte bytes of space from within the B-Tree page passed
** as the first argument. Write into *pIdx the index into pPage->aData[]
** of the first byte of allocated space. Return either SQLITE_OK or
** an error code (usually SQLITE_CORRUPT).
**
** The caller guarantees that there is sufficient space to make the
** allocation. This routine might need to defragment in order to bring
** all the space together, however. This routine will avoid using
** the first two bytes past the cell pointer area since presumably this
** allocation is being made in order to insert a new cell, so we will
** also end up needing a new cell pointer.
*/
static int allocateSpace(MemPage *pPage, int nByte, int *pIdx){
const int hdr = pPage->hdrOffset; /* Local cache of pPage->hdrOffset */
u8 * const data = pPage->aData; /* Local cache of pPage->aData */
int top; /* First byte of cell content area */
int rc = SQLITE_OK; /* Integer return code */
int gap; /* First byte of gap between cell pointers and cell content */
assert( sqlite3PagerIswriteable(pPage->pDbPage) );
assert( pPage->pBt );
assert( sqlite3_mutex_held(pPage->pBt->mutex) );
assert( nByte>=0 ); /* Minimum cell size is 4 */
assert( pPage->nFree>=nByte );
assert( pPage->nOverflow==0 );
assert( nByte < (int)(pPage->pBt->usableSize-8) );
assert( pPage->cellOffset == hdr + 12 - 4*pPage->leaf );
gap = pPage->cellOffset + 2*pPage->nCell;
assert( gap<=65536 );
/* EVIDENCE-OF: R-29356-02391 If the database uses a 65536-byte page size
** and the reserved space is zero (the usual value for reserved space)
** then the cell content offset of an empty page wants to be 65536.
** However, that integer is too large to be stored in a 2-byte unsigned
** integer, so a value of 0 is used in its place. */
top = get2byte(&data[hdr+5]);
assert( top<=(int)pPage->pBt->usableSize ); /* by btreeComputeFreeSpace() */
if( gap>top ){
if( top==0 && pPage->pBt->usableSize==65536 ){
top = 65536;
}else{
return SQLITE_CORRUPT_PAGE(pPage);
}
}
/* If there is enough space between gap and top for one more cell pointer,
** and if the freelist is not empty, then search the
** freelist looking for a slot big enough to satisfy the request.
*/
testcase( gap+2==top );
testcase( gap+1==top );
testcase( gap==top );
if( (data[hdr+2] || data[hdr+1]) && gap+2<=top ){
u8 *pSpace = pageFindSlot(pPage, nByte, &rc);
if( pSpace ){
assert( pSpace+nByte<=data+pPage->pBt->usableSize );
if( (*pIdx = (int)(pSpace-data))<=gap ){
return SQLITE_CORRUPT_PAGE(pPage);
}else{
return SQLITE_OK;
}
}else if( rc ){
return rc;
}
}
/* The request could not be fulfilled using a freelist slot. Check
** to see if defragmentation is necessary.
*/
testcase( gap+2+nByte==top );
if( gap+2+nByte>top ){
assert( pPage->nCell>0 || CORRUPT_DB );
assert( pPage->nFree>=0 );
rc = defragmentPage(pPage, MIN(4, pPage->nFree - (2+nByte)));
if( rc ) return rc;
top = get2byteNotZero(&data[hdr+5]);
assert( gap+2+nByte<=top );
}
/* Allocate memory from the gap in between the cell pointer array
** and the cell content area. The btreeComputeFreeSpace() call has already
** validated the freelist. Given that the freelist is valid, there
** is no way that the allocation can extend off the end of the page.
** The assert() below verifies the previous sentence.
*/
top -= nByte;
put2byte(&data[hdr+5], top);
assert( top+nByte <= (int)pPage->pBt->usableSize );
*pIdx = top;
return SQLITE_OK;
}
/*
** Return a section of the pPage->aData to the freelist.
** The first byte of the new free block is pPage->aData[iStart]
** and the size of the block is iSize bytes.
**
** Adjacent freeblocks are coalesced.
**
** Even though the freeblock list was checked by btreeComputeFreeSpace(),
** that routine will not detect overlap between cells or freeblocks. Nor
** does it detect cells or freeblocks that encrouch into the reserved bytes
** at the end of the page. So do additional corruption checks inside this
** routine and return SQLITE_CORRUPT if any problems are found.
*/
static int freeSpace(MemPage *pPage, u16 iStart, u16 iSize){
u16 iPtr; /* Address of ptr to next freeblock */
u16 iFreeBlk; /* Address of the next freeblock */
u8 hdr; /* Page header size. 0 or 100 */
u8 nFrag = 0; /* Reduction in fragmentation */
u16 iOrigSize = iSize; /* Original value of iSize */
u16 x; /* Offset to cell content area */
u32 iEnd = iStart + iSize; /* First byte past the iStart buffer */
unsigned char *data = pPage->aData; /* Page content */
assert( pPage->pBt!=0 );
assert( sqlite3PagerIswriteable(pPage->pDbPage) );
assert( CORRUPT_DB || iStart>=pPage->hdrOffset+6+pPage->childPtrSize );
assert( CORRUPT_DB || iEnd <= pPage->pBt->usableSize );
assert( sqlite3_mutex_held(pPage->pBt->mutex) );
assert( iSize>=4 ); /* Minimum cell size is 4 */
assert( iStart<=pPage->pBt->usableSize-4 );
/* The list of freeblocks must be in ascending order. Find the
** spot on the list where iStart should be inserted.
*/
hdr = pPage->hdrOffset;
iPtr = hdr + 1;
if( data[iPtr+1]==0 && data[iPtr]==0 ){
iFreeBlk = 0; /* Shortcut for the case when the freelist is empty */
}else{
while( (iFreeBlk = get2byte(&data[iPtr]))<iStart ){
if( iFreeBlk<iPtr+4 ){
if( iFreeBlk==0 ) break;
return SQLITE_CORRUPT_PAGE(pPage);
}
iPtr = iFreeBlk;
}
if( iFreeBlk>pPage->pBt->usableSize-4 ){
return SQLITE_CORRUPT_PAGE(pPage);
}
assert( iFreeBlk>iPtr || iFreeBlk==0 );
/* At this point:
** iFreeBlk: First freeblock after iStart, or zero if none
** iPtr: The address of a pointer to iFreeBlk
**
** Check to see if iFreeBlk should be coalesced onto the end of iStart.
*/
if( iFreeBlk && iEnd+3>=iFreeBlk ){
nFrag = iFreeBlk - iEnd;
if( iEnd>iFreeBlk ) return SQLITE_CORRUPT_PAGE(pPage);
iEnd = iFreeBlk + get2byte(&data[iFreeBlk+2]);
if( iEnd > pPage->pBt->usableSize ){
return SQLITE_CORRUPT_PAGE(pPage);
}
iSize = iEnd - iStart;
iFreeBlk = get2byte(&data[iFreeBlk]);
}
/* If iPtr is another freeblock (that is, if iPtr is not the freelist
** pointer in the page header) then check to see if iStart should be
** coalesced onto the end of iPtr.
*/
if( iPtr>hdr+1 ){
int iPtrEnd = iPtr + get2byte(&data[iPtr+2]);
if( iPtrEnd+3>=iStart ){
if( iPtrEnd>iStart ) return SQLITE_CORRUPT_PAGE(pPage);
nFrag += iStart - iPtrEnd;
iSize = iEnd - iPtr;
iStart = iPtr;
}
}
if( nFrag>data[hdr+7] ) return SQLITE_CORRUPT_PAGE(pPage);
data[hdr+7] -= nFrag;
}
x = get2byte(&data[hdr+5]);
if( iStart<=x ){
/* The new freeblock is at the beginning of the cell content area,
** so just extend the cell content area rather than create another
** freelist entry */
if( iStart<x || iPtr!=hdr+1 ) return SQLITE_CORRUPT_PAGE(pPage);
put2byte(&data[hdr+1], iFreeBlk);
put2byte(&data[hdr+5], iEnd);
}else{
/* Insert the new freeblock into the freelist */
put2byte(&data[iPtr], iStart);
}
if( pPage->pBt->btsFlags & BTS_FAST_SECURE ){
/* Overwrite deleted information with zeros when the secure_delete
** option is enabled */
memset(&data[iStart], 0, iSize);
}
put2byte(&data[iStart], iFreeBlk);
put2byte(&data[iStart+2], iSize);
pPage->nFree += iOrigSize;
return SQLITE_OK;
}
/*
** Decode the flags byte (the first byte of the header) for a page
** and initialize fields of the MemPage structure accordingly.
**
** Only the following combinations are supported. Anything different
** indicates a corrupt database files:
**
** PTF_ZERODATA
** PTF_ZERODATA | PTF_LEAF
** PTF_LEAFDATA | PTF_INTKEY
** PTF_LEAFDATA | PTF_INTKEY | PTF_LEAF
*/
static int decodeFlags(MemPage *pPage, int flagByte){
BtShared *pBt; /* A copy of pPage->pBt */
assert( pPage->hdrOffset==(pPage->pgno==1 ? 100 : 0) );
assert( sqlite3_mutex_held(pPage->pBt->mutex) );
pPage->leaf = (u8)(flagByte>>3); assert( PTF_LEAF == 1<<3 );
flagByte &= ~PTF_LEAF;
pPage->childPtrSize = 4-4*pPage->leaf;
pPage->xCellSize = cellSizePtr;
pBt = pPage->pBt;
if( flagByte==(PTF_LEAFDATA | PTF_INTKEY) ){
/* EVIDENCE-OF: R-07291-35328 A value of 5 (0x05) means the page is an
** interior table b-tree page. */
assert( (PTF_LEAFDATA|PTF_INTKEY)==5 );
/* EVIDENCE-OF: R-26900-09176 A value of 13 (0x0d) means the page is a
** leaf table b-tree page. */
assert( (PTF_LEAFDATA|PTF_INTKEY|PTF_LEAF)==13 );
pPage->intKey = 1;
if( pPage->leaf ){
pPage->intKeyLeaf = 1;
pPage->xParseCell = btreeParseCellPtr;
}else{
pPage->intKeyLeaf = 0;
pPage->xCellSize = cellSizePtrNoPayload;
pPage->xParseCell = btreeParseCellPtrNoPayload;
}
pPage->maxLocal = pBt->maxLeaf;
pPage->minLocal = pBt->minLeaf;
}else if( flagByte==PTF_ZERODATA ){
/* EVIDENCE-OF: R-43316-37308 A value of 2 (0x02) means the page is an
** interior index b-tree page. */
assert( (PTF_ZERODATA)==2 );
/* EVIDENCE-OF: R-59615-42828 A value of 10 (0x0a) means the page is a
** leaf index b-tree page. */
assert( (PTF_ZERODATA|PTF_LEAF)==10 );
pPage->intKey = 0;
pPage->intKeyLeaf = 0;
pPage->xParseCell = btreeParseCellPtrIndex;
pPage->maxLocal = pBt->maxLocal;
pPage->minLocal = pBt->minLocal;
}else{
/* EVIDENCE-OF: R-47608-56469 Any other value for the b-tree page type is
** an error. */
return SQLITE_CORRUPT_PAGE(pPage);
}
pPage->max1bytePayload = pBt->max1bytePayload;
return SQLITE_OK;
}
/*
** Compute the amount of freespace on the page. In other words, fill
** in the pPage->nFree field.
*/
static int btreeComputeFreeSpace(MemPage *pPage){
int pc; /* Address of a freeblock within pPage->aData[] */
u8 hdr; /* Offset to beginning of page header */
u8 *data; /* Equal to pPage->aData */
int usableSize; /* Amount of usable space on each page */
int nFree; /* Number of unused bytes on the page */
int top; /* First byte of the cell content area */
int iCellFirst; /* First allowable cell or freeblock offset */
int iCellLast; /* Last possible cell or freeblock offset */
assert( pPage->pBt!=0 );
assert( pPage->pBt->db!=0 );
assert( sqlite3_mutex_held(pPage->pBt->mutex) );
assert( pPage->pgno==sqlite3PagerPagenumber(pPage->pDbPage) );
assert( pPage == sqlite3PagerGetExtra(pPage->pDbPage) );
assert( pPage->aData == sqlite3PagerGetData(pPage->pDbPage) );
assert( pPage->isInit==1 );
assert( pPage->nFree<0 );
usableSize = pPage->pBt->usableSize;
hdr = pPage->hdrOffset;
data = pPage->aData;
/* EVIDENCE-OF: R-58015-48175 The two-byte integer at offset 5 designates
** the start of the cell content area. A zero value for this integer is
** interpreted as 65536. */
top = get2byteNotZero(&data[hdr+5]);
iCellFirst = hdr + 8 + pPage->childPtrSize + 2*pPage->nCell;
iCellLast = usableSize - 4;
/* Compute the total free space on the page
** EVIDENCE-OF: R-23588-34450 The two-byte integer at offset 1 gives the
** start of the first freeblock on the page, or is zero if there are no
** freeblocks. */
pc = get2byte(&data[hdr+1]);
nFree = data[hdr+7] + top; /* Init nFree to non-freeblock free space */
if( pc>0 ){
u32 next, size;
if( pc<iCellFirst ){
/* EVIDENCE-OF: R-55530-52930 In a well-formed b-tree page, there will
** always be at least one cell before the first freeblock.
*/
return SQLITE_CORRUPT_PAGE(pPage);
}
while( 1 ){
if( pc>iCellLast ){
/* Freeblock off the end of the page */
return SQLITE_CORRUPT_PAGE(pPage);
}
next = get2byte(&data[pc]);
size = get2byte(&data[pc+2]);
nFree = nFree + size;
if( next<=pc+size+3 ) break;
pc = next;
}
if( next>0 ){
/* Freeblock not in ascending order */
return SQLITE_CORRUPT_PAGE(pPage);
}
if( pc+size>(unsigned int)usableSize ){
/* Last freeblock extends past page end */
return SQLITE_CORRUPT_PAGE(pPage);
}
}
/* At this point, nFree contains the sum of the offset to the start
** of the cell-content area plus the number of free bytes within
** the cell-content area. If this is greater than the usable-size
** of the page, then the page must be corrupted. This check also
** serves to verify that the offset to the start of the cell-content
** area, according to the page header, lies within the page.
*/
if( nFree>usableSize || nFree<iCellFirst ){
return SQLITE_CORRUPT_PAGE(pPage);
}
pPage->nFree = (u16)(nFree - iCellFirst);
return SQLITE_OK;
}
/*
** Do additional sanity check after btreeInitPage() if
** PRAGMA cell_size_check=ON
*/
static SQLITE_NOINLINE int btreeCellSizeCheck(MemPage *pPage){
int iCellFirst; /* First allowable cell or freeblock offset */
int iCellLast; /* Last possible cell or freeblock offset */
int i; /* Index into the cell pointer array */
int sz; /* Size of a cell */
int pc; /* Address of a freeblock within pPage->aData[] */
u8 *data; /* Equal to pPage->aData */
int usableSize; /* Maximum usable space on the page */
int cellOffset; /* Start of cell content area */
iCellFirst = pPage->cellOffset + 2*pPage->nCell;
usableSize = pPage->pBt->usableSize;
iCellLast = usableSize - 4;
data = pPage->aData;
cellOffset = pPage->cellOffset;
if( !pPage->leaf ) iCellLast--;
for(i=0; i<pPage->nCell; i++){
pc = get2byteAligned(&data[cellOffset+i*2]);
testcase( pc==iCellFirst );
testcase( pc==iCellLast );
if( pc<iCellFirst || pc>iCellLast ){
return SQLITE_CORRUPT_PAGE(pPage);
}
sz = pPage->xCellSize(pPage, &data[pc]);
testcase( pc+sz==usableSize );
if( pc+sz>usableSize ){
return SQLITE_CORRUPT_PAGE(pPage);
}
}
return SQLITE_OK;
}
/*
** Initialize the auxiliary information for a disk block.
**
** Return SQLITE_OK on success. If we see that the page does
** not contain a well-formed database page, then return
** SQLITE_CORRUPT. Note that a return of SQLITE_OK does not
** guarantee that the page is well-formed. It only shows that
** we failed to detect any corruption.
*/
static int btreeInitPage(MemPage *pPage){
u8 *data; /* Equal to pPage->aData */
BtShared *pBt; /* The main btree structure */
assert( pPage->pBt!=0 );
assert( pPage->pBt->db!=0 );
assert( sqlite3_mutex_held(pPage->pBt->mutex) );
assert( pPage->pgno==sqlite3PagerPagenumber(pPage->pDbPage) );
assert( pPage == sqlite3PagerGetExtra(pPage->pDbPage) );
assert( pPage->aData == sqlite3PagerGetData(pPage->pDbPage) );
assert( pPage->isInit==0 );
pBt = pPage->pBt;
data = pPage->aData + pPage->hdrOffset;
/* EVIDENCE-OF: R-28594-02890 The one-byte flag at offset 0 indicating
** the b-tree page type. */
if( decodeFlags(pPage, data[0]) ){
return SQLITE_CORRUPT_PAGE(pPage);
}
assert( pBt->pageSize>=512 && pBt->pageSize<=65536 );
pPage->maskPage = (u16)(pBt->pageSize - 1);
pPage->nOverflow = 0;
pPage->cellOffset = pPage->hdrOffset + 8 + pPage->childPtrSize;
pPage->aCellIdx = data + pPage->childPtrSize + 8;
pPage->aDataEnd = pPage->aData + pBt->usableSize;
pPage->aDataOfst = pPage->aData + pPage->childPtrSize;
/* EVIDENCE-OF: R-37002-32774 The two-byte integer at offset 3 gives the
** number of cells on the page. */
pPage->nCell = get2byte(&data[3]);
if( pPage->nCell>MX_CELL(pBt) ){
/* To many cells for a single page. The page must be corrupt */
return SQLITE_CORRUPT_PAGE(pPage);
}
testcase( pPage->nCell==MX_CELL(pBt) );
/* EVIDENCE-OF: R-24089-57979 If a page contains no cells (which is only
** possible for a root page of a table that contains no rows) then the
** offset to the cell content area will equal the page size minus the
** bytes of reserved space. */
assert( pPage->nCell>0
|| get2byteNotZero(&data[5])==(int)pBt->usableSize
|| CORRUPT_DB );
pPage->nFree = -1; /* Indicate that this value is yet uncomputed */
pPage->isInit = 1;
if( pBt->db->flags & SQLITE_CellSizeCk ){
return btreeCellSizeCheck(pPage);
}
return SQLITE_OK;
}
/*
** Set up a raw page so that it looks like a database page holding
** no entries.
*/
static void zeroPage(MemPage *pPage, int flags){
unsigned char *data = pPage->aData;
BtShared *pBt = pPage->pBt;
u8 hdr = pPage->hdrOffset;
u16 first;
assert( sqlite3PagerPagenumber(pPage->pDbPage)==pPage->pgno );
assert( sqlite3PagerGetExtra(pPage->pDbPage) == (void*)pPage );
assert( sqlite3PagerGetData(pPage->pDbPage) == data );
assert( sqlite3PagerIswriteable(pPage->pDbPage) );
assert( sqlite3_mutex_held(pBt->mutex) );
if( pBt->btsFlags & BTS_FAST_SECURE ){
memset(&data[hdr], 0, pBt->usableSize - hdr);
}
data[hdr] = (char)flags;
first = hdr + ((flags&PTF_LEAF)==0 ? 12 : 8);
memset(&data[hdr+1], 0, 4);
data[hdr+7] = 0;
put2byte(&data[hdr+5], pBt->usableSize);
pPage->nFree = (u16)(pBt->usableSize - first);
decodeFlags(pPage, flags);
pPage->cellOffset = first;
pPage->aDataEnd = &data[pBt->usableSize];
pPage->aCellIdx = &data[first];
pPage->aDataOfst = &data[pPage->childPtrSize];
pPage->nOverflow = 0;
assert( pBt->pageSize>=512 && pBt->pageSize<=65536 );
pPage->maskPage = (u16)(pBt->pageSize - 1);
pPage->nCell = 0;
pPage->isInit = 1;
}
/*
** Convert a DbPage obtained from the pager into a MemPage used by
** the btree layer.
*/
static MemPage *btreePageFromDbPage(DbPage *pDbPage, Pgno pgno, BtShared *pBt){
MemPage *pPage = (MemPage*)sqlite3PagerGetExtra(pDbPage);
if( pgno!=pPage->pgno ){
pPage->aData = sqlite3PagerGetData(pDbPage);
pPage->pDbPage = pDbPage;
pPage->pBt = pBt;
pPage->pgno = pgno;
pPage->hdrOffset = pgno==1 ? 100 : 0;
}
assert( pPage->aData==sqlite3PagerGetData(pDbPage) );
return pPage;
}
/*
** Get a page from the pager. Initialize the MemPage.pBt and
** MemPage.aData elements if needed. See also: btreeGetUnusedPage().
**
** If the PAGER_GET_NOCONTENT flag is set, it means that we do not care
** about the content of the page at this time. So do not go to the disk
** to fetch the content. Just fill in the content with zeros for now.
** If in the future we call sqlite3PagerWrite() on this page, that
** means we have started to be concerned about content and the disk
** read should occur at that point.
*/
static int btreeGetPage(
BtShared *pBt, /* The btree */
Pgno pgno, /* Number of the page to fetch */
MemPage **ppPage, /* Return the page in this parameter */
int flags /* PAGER_GET_NOCONTENT or PAGER_GET_READONLY */
){
int rc;
DbPage *pDbPage;
assert( flags==0 || flags==PAGER_GET_NOCONTENT || flags==PAGER_GET_READONLY );
assert( sqlite3_mutex_held(pBt->mutex) );
rc = sqlite3PagerGet(pBt->pPager, pgno, (DbPage**)&pDbPage, flags);
if( rc ) return rc;
*ppPage = btreePageFromDbPage(pDbPage, pgno, pBt);
return SQLITE_OK;
}
/*
** Retrieve a page from the pager cache. If the requested page is not
** already in the pager cache return NULL. Initialize the MemPage.pBt and
** MemPage.aData elements if needed.
*/
static MemPage *btreePageLookup(BtShared *pBt, Pgno pgno){
DbPage *pDbPage;
assert( sqlite3_mutex_held(pBt->mutex) );
pDbPage = sqlite3PagerLookup(pBt->pPager, pgno);
if( pDbPage ){
return btreePageFromDbPage(pDbPage, pgno, pBt);
}
return 0;
}
/*
** Return the size of the database file in pages. If there is any kind of
** error, return ((unsigned int)-1).
*/
static Pgno btreePagecount(BtShared *pBt){
return pBt->nPage;
}
u32 sqlite3BtreeLastPage(Btree *p){
assert( sqlite3BtreeHoldsMutex(p) );
assert( ((p->pBt->nPage)&0x80000000)==0 );
return btreePagecount(p->pBt);
}
/*
** Get a page from the pager and initialize it.
**
** If pCur!=0 then the page is being fetched as part of a moveToChild()
** call. Do additional sanity checking on the page in this case.
** And if the fetch fails, this routine must decrement pCur->iPage.
**
** The page is fetched as read-write unless pCur is not NULL and is
** a read-only cursor.
**
** If an error occurs, then *ppPage is undefined. It
** may remain unchanged, or it may be set to an invalid value.
*/
static int getAndInitPage(
BtShared *pBt, /* The database file */
Pgno pgno, /* Number of the page to get */
MemPage **ppPage, /* Write the page pointer here */
BtCursor *pCur, /* Cursor to receive the page, or NULL */
int bReadOnly /* True for a read-only page */
){
int rc;
DbPage *pDbPage;
assert( sqlite3_mutex_held(pBt->mutex) );
assert( pCur==0 || ppPage==&pCur->pPage );
assert( pCur==0 || bReadOnly==pCur->curPagerFlags );
assert( pCur==0 || pCur->iPage>0 );
if( pgno>btreePagecount(pBt) ){
rc = SQLITE_CORRUPT_BKPT;
goto getAndInitPage_error1;
}
rc = sqlite3PagerGet(pBt->pPager, pgno, (DbPage**)&pDbPage, bReadOnly);
if( rc ){
goto getAndInitPage_error1;
}
*ppPage = (MemPage*)sqlite3PagerGetExtra(pDbPage);
if( (*ppPage)->isInit==0 ){
btreePageFromDbPage(pDbPage, pgno, pBt);
rc = btreeInitPage(*ppPage);
if( rc!=SQLITE_OK ){
goto getAndInitPage_error2;
}
}
assert( (*ppPage)->pgno==pgno );
assert( (*ppPage)->aData==sqlite3PagerGetData(pDbPage) );
/* If obtaining a child page for a cursor, we must verify that the page is
** compatible with the root page. */
if( pCur && ((*ppPage)->nCell<1 || (*ppPage)->intKey!=pCur->curIntKey) ){
rc = SQLITE_CORRUPT_PGNO(pgno);
goto getAndInitPage_error2;
}
return SQLITE_OK;
getAndInitPage_error2:
releasePage(*ppPage);
getAndInitPage_error1:
if( pCur ){
pCur->iPage--;
pCur->pPage = pCur->apPage[pCur->iPage];
}
testcase( pgno==0 );
assert( pgno!=0 || rc==SQLITE_CORRUPT );
return rc;
}
/*
** Release a MemPage. This should be called once for each prior
** call to btreeGetPage.
**
** Page1 is a special case and must be released using releasePageOne().
*/
static void releasePageNotNull(MemPage *pPage){
assert( pPage->aData );
assert( pPage->pBt );
assert( pPage->pDbPage!=0 );
assert( sqlite3PagerGetExtra(pPage->pDbPage) == (void*)pPage );
assert( sqlite3PagerGetData(pPage->pDbPage)==pPage->aData );
assert( sqlite3_mutex_held(pPage->pBt->mutex) );
sqlite3PagerUnrefNotNull(pPage->pDbPage);
}
static void releasePage(MemPage *pPage){
if( pPage ) releasePageNotNull(pPage);
}
static void releasePageOne(MemPage *pPage){
assert( pPage!=0 );
assert( pPage->aData );
assert( pPage->pBt );
assert( pPage->pDbPage!=0 );
assert( sqlite3PagerGetExtra(pPage->pDbPage) == (void*)pPage );
assert( sqlite3PagerGetData(pPage->pDbPage)==pPage->aData );
assert( sqlite3_mutex_held(pPage->pBt->mutex) );
sqlite3PagerUnrefPageOne(pPage->pDbPage);
}
/*
** Get an unused page.
**
** This works just like btreeGetPage() with the addition:
**
** * If the page is already in use for some other purpose, immediately
** release it and return an SQLITE_CURRUPT error.
** * Make sure the isInit flag is clear
*/
static int btreeGetUnusedPage(
BtShared *pBt, /* The btree */
Pgno pgno, /* Number of the page to fetch */
MemPage **ppPage, /* Return the page in this parameter */
int flags /* PAGER_GET_NOCONTENT or PAGER_GET_READONLY */
){
int rc = btreeGetPage(pBt, pgno, ppPage, flags);
if( rc==SQLITE_OK ){
if( sqlite3PagerPageRefcount((*ppPage)->pDbPage)>1 ){
releasePage(*ppPage);
*ppPage = 0;
return SQLITE_CORRUPT_BKPT;
}
(*ppPage)->isInit = 0;
}else{
*ppPage = 0;
}
return rc;
}
/*
** During a rollback, when the pager reloads information into the cache
** so that the cache is restored to its original state at the start of
** the transaction, for each page restored this routine is called.
**
** This routine needs to reset the extra data section at the end of the
** page to agree with the restored data.
*/
static void pageReinit(DbPage *pData){
MemPage *pPage;
pPage = (MemPage *)sqlite3PagerGetExtra(pData);
assert( sqlite3PagerPageRefcount(pData)>0 );
if( pPage->isInit ){
assert( sqlite3_mutex_held(pPage->pBt->mutex) );
pPage->isInit = 0;
if( sqlite3PagerPageRefcount(pData)>1 ){
/* pPage might not be a btree page; it might be an overflow page
** or ptrmap page or a free page. In those cases, the following
** call to btreeInitPage() will likely return SQLITE_CORRUPT.
** But no harm is done by this. And it is very important that
** btreeInitPage() be called on every btree page so we make
** the call for every page that comes in for re-initing. */
btreeInitPage(pPage);
}
}
}
/*
** Invoke the busy handler for a btree.
*/
static int btreeInvokeBusyHandler(void *pArg){
BtShared *pBt = (BtShared*)pArg;
assert( pBt->db );
assert( sqlite3_mutex_held(pBt->db->mutex) );
return sqlite3InvokeBusyHandler(&pBt->db->busyHandler,
sqlite3PagerFile(pBt->pPager));
}
/*
** Open a database file.
**
** zFilename is the name of the database file. If zFilename is NULL
** then an ephemeral database is created. The ephemeral database might
** be exclusively in memory, or it might use a disk-based memory cache.
** Either way, the ephemeral database will be automatically deleted
** when sqlite3BtreeClose() is called.
**
** If zFilename is ":memory:" then an in-memory database is created
** that is automatically destroyed when it is closed.
**
** The "flags" parameter is a bitmask that might contain bits like
** BTREE_OMIT_JOURNAL and/or BTREE_MEMORY.
**
** If the database is already opened in the same database connection
** and we are in shared cache mode, then the open will fail with an
** SQLITE_CONSTRAINT error. We cannot allow two or more BtShared
** objects in the same database connection since doing so will lead
** to problems with locking.
*/
int sqlite3BtreeOpen(
sqlite3_vfs *pVfs, /* VFS to use for this b-tree */
const char *zFilename, /* Name of the file containing the BTree database */
sqlite3 *db, /* Associated database handle */
Btree **ppBtree, /* Pointer to new Btree object written here */
int flags, /* Options */
int vfsFlags /* Flags passed through to sqlite3_vfs.xOpen() */
){
BtShared *pBt = 0; /* Shared part of btree structure */
Btree *p; /* Handle to return */
sqlite3_mutex *mutexOpen = 0; /* Prevents a race condition. Ticket #3537 */
int rc = SQLITE_OK; /* Result code from this function */
u8 nReserve; /* Byte of unused space on each page */
unsigned char zDbHeader[100]; /* Database header content */
/* True if opening an ephemeral, temporary database */
const int isTempDb = zFilename==0 || zFilename[0]==0;
/* Set the variable isMemdb to true for an in-memory database, or
** false for a file-based database.
*/
#ifdef SQLITE_OMIT_MEMORYDB
const int isMemdb = 0;
#else
const int isMemdb = (zFilename && strcmp(zFilename, ":memory:")==0)
|| (isTempDb && sqlite3TempInMemory(db))
|| (vfsFlags & SQLITE_OPEN_MEMORY)!=0;
#endif
assert( db!=0 );
assert( pVfs!=0 );
assert( sqlite3_mutex_held(db->mutex) );
assert( (flags&0xff)==flags ); /* flags fit in 8 bits */
/* Only a BTREE_SINGLE database can be BTREE_UNORDERED */
assert( (flags & BTREE_UNORDERED)==0 || (flags & BTREE_SINGLE)!=0 );
/* A BTREE_SINGLE database is always a temporary and/or ephemeral */
assert( (flags & BTREE_SINGLE)==0 || isTempDb );
if( isMemdb ){
flags |= BTREE_MEMORY;
}
if( (vfsFlags & SQLITE_OPEN_MAIN_DB)!=0 && (isMemdb || isTempDb) ){
vfsFlags = (vfsFlags & ~SQLITE_OPEN_MAIN_DB) | SQLITE_OPEN_TEMP_DB;
}
p = sqlite3MallocZero(sizeof(Btree));
if( !p ){
return SQLITE_NOMEM_BKPT;
}
p->inTrans = TRANS_NONE;
p->db = db;
#ifndef SQLITE_OMIT_SHARED_CACHE
p->lock.pBtree = p;
p->lock.iTable = 1;
#endif
#if !defined(SQLITE_OMIT_SHARED_CACHE) && !defined(SQLITE_OMIT_DISKIO)
/*
** If this Btree is a candidate for shared cache, try to find an
** existing BtShared object that we can share with
*/
if( isTempDb==0 && (isMemdb==0 || (vfsFlags&SQLITE_OPEN_URI)!=0) ){
if( vfsFlags & SQLITE_OPEN_SHAREDCACHE ){
int nFilename = sqlite3Strlen30(zFilename)+1;
int nFullPathname = pVfs->mxPathname+1;
char *zFullPathname = sqlite3Malloc(MAX(nFullPathname,nFilename));
MUTEX_LOGIC( sqlite3_mutex *mutexShared; )
p->sharable = 1;
if( !zFullPathname ){
sqlite3_free(p);
return SQLITE_NOMEM_BKPT;
}
if( isMemdb ){
memcpy(zFullPathname, zFilename, nFilename);
}else{
rc = sqlite3OsFullPathname(pVfs, zFilename,
nFullPathname, zFullPathname);
if( rc ){
sqlite3_free(zFullPathname);
sqlite3_free(p);
return rc;
}
}
#if SQLITE_THREADSAFE
mutexOpen = sqlite3MutexAlloc(SQLITE_MUTEX_STATIC_OPEN);
sqlite3_mutex_enter(mutexOpen);
mutexShared = sqlite3MutexAlloc(SQLITE_MUTEX_STATIC_MASTER);
sqlite3_mutex_enter(mutexShared);
#endif
for(pBt=GLOBAL(BtShared*,sqlite3SharedCacheList); pBt; pBt=pBt->pNext){
assert( pBt->nRef>0 );
if( 0==strcmp(zFullPathname, sqlite3PagerFilename(pBt->pPager, 0))
&& sqlite3PagerVfs(pBt->pPager)==pVfs ){
int iDb;
for(iDb=db->nDb-1; iDb>=0; iDb--){
Btree *pExisting = db->aDb[iDb].pBt;
if( pExisting && pExisting->pBt==pBt ){
sqlite3_mutex_leave(mutexShared);
sqlite3_mutex_leave(mutexOpen);
sqlite3_free(zFullPathname);
sqlite3_free(p);
return SQLITE_CONSTRAINT;
}
}
p->pBt = pBt;
pBt->nRef++;
break;
}
}
sqlite3_mutex_leave(mutexShared);
sqlite3_free(zFullPathname);
}
#ifdef SQLITE_DEBUG
else{
/* In debug mode, we mark all persistent databases as sharable
** even when they are not. This exercises the locking code and
** gives more opportunity for asserts(sqlite3_mutex_held())
** statements to find locking problems.
*/
p->sharable = 1;
}
#endif
}
#endif
if( pBt==0 ){
/*
** The following asserts make sure that structures used by the btree are
** the right size. This is to guard against size changes that result
** when compiling on a different architecture.
*/
assert( sizeof(i64)==8 );
assert( sizeof(u64)==8 );
assert( sizeof(u32)==4 );
assert( sizeof(u16)==2 );
assert( sizeof(Pgno)==4 );
pBt = sqlite3MallocZero( sizeof(*pBt) );
if( pBt==0 ){
rc = SQLITE_NOMEM_BKPT;
goto btree_open_out;
}
rc = sqlite3PagerOpen(pVfs, &pBt->pPager, zFilename,
sizeof(MemPage), flags, vfsFlags, pageReinit);
if( rc==SQLITE_OK ){
sqlite3PagerSetMmapLimit(pBt->pPager, db->szMmap);
rc = sqlite3PagerReadFileheader(pBt->pPager,sizeof(zDbHeader),zDbHeader);
}
if( rc!=SQLITE_OK ){
goto btree_open_out;
}
pBt->openFlags = (u8)flags;
pBt->db = db;
sqlite3PagerSetBusyHandler(pBt->pPager, btreeInvokeBusyHandler, pBt);
p->pBt = pBt;
pBt->pCursor = 0;
pBt->pPage1 = 0;
if( sqlite3PagerIsreadonly(pBt->pPager) ) pBt->btsFlags |= BTS_READ_ONLY;
#if defined(SQLITE_SECURE_DELETE)
pBt->btsFlags |= BTS_SECURE_DELETE;
#elif defined(SQLITE_FAST_SECURE_DELETE)
pBt->btsFlags |= BTS_OVERWRITE;
#endif
/* EVIDENCE-OF: R-51873-39618 The page size for a database file is
** determined by the 2-byte integer located at an offset of 16 bytes from
** the beginning of the database file. */
pBt->pageSize = (zDbHeader[16]<<8) | (zDbHeader[17]<<16);
if( pBt->pageSize<512 || pBt->pageSize>SQLITE_MAX_PAGE_SIZE
|| ((pBt->pageSize-1)&pBt->pageSize)!=0 ){
pBt->pageSize = 0;
#ifndef SQLITE_OMIT_AUTOVACUUM
/* If the magic name ":memory:" will create an in-memory database, then
** leave the autoVacuum mode at 0 (do not auto-vacuum), even if
** SQLITE_DEFAULT_AUTOVACUUM is true. On the other hand, if
** SQLITE_OMIT_MEMORYDB has been defined, then ":memory:" is just a
** regular file-name. In this case the auto-vacuum applies as per normal.
*/
if( zFilename && !isMemdb ){
pBt->autoVacuum = (SQLITE_DEFAULT_AUTOVACUUM ? 1 : 0);
pBt->incrVacuum = (SQLITE_DEFAULT_AUTOVACUUM==2 ? 1 : 0);
}
#endif
nReserve = 0;
}else{
/* EVIDENCE-OF: R-37497-42412 The size of the reserved region is
** determined by the one-byte unsigned integer found at an offset of 20
** into the database file header. */
nReserve = zDbHeader[20];
pBt->btsFlags |= BTS_PAGESIZE_FIXED;
#ifndef SQLITE_OMIT_AUTOVACUUM
pBt->autoVacuum = (get4byte(&zDbHeader[36 + 4*4])?1:0);
pBt->incrVacuum = (get4byte(&zDbHeader[36 + 7*4])?1:0);
#endif
}
rc = sqlite3PagerSetPagesize(pBt->pPager, &pBt->pageSize, nReserve);
if( rc ) goto btree_open_out;
pBt->usableSize = pBt->pageSize - nReserve;
assert( (pBt->pageSize & 7)==0 ); /* 8-byte alignment of pageSize */
#if !defined(SQLITE_OMIT_SHARED_CACHE) && !defined(SQLITE_OMIT_DISKIO)
/* Add the new BtShared object to the linked list sharable BtShareds.
*/
pBt->nRef = 1;
if( p->sharable ){
MUTEX_LOGIC( sqlite3_mutex *mutexShared; )
MUTEX_LOGIC( mutexShared = sqlite3MutexAlloc(SQLITE_MUTEX_STATIC_MASTER);)
if( SQLITE_THREADSAFE && sqlite3GlobalConfig.bCoreMutex ){
pBt->mutex = sqlite3MutexAlloc(SQLITE_MUTEX_FAST);
if( pBt->mutex==0 ){
rc = SQLITE_NOMEM_BKPT;
goto btree_open_out;
}
}
sqlite3_mutex_enter(mutexShared);
pBt->pNext = GLOBAL(BtShared*,sqlite3SharedCacheList);
GLOBAL(BtShared*,sqlite3SharedCacheList) = pBt;
sqlite3_mutex_leave(mutexShared);
}
#endif
}
#if !defined(SQLITE_OMIT_SHARED_CACHE) && !defined(SQLITE_OMIT_DISKIO)
/* If the new Btree uses a sharable pBtShared, then link the new
** Btree into the list of all sharable Btrees for the same connection.
** The list is kept in ascending order by pBt address.
*/
if( p->sharable ){
int i;
Btree *pSib;
for(i=0; i<db->nDb; i++){
if( (pSib = db->aDb[i].pBt)!=0 && pSib->sharable ){
while( pSib->pPrev ){ pSib = pSib->pPrev; }
if( (uptr)p->pBt<(uptr)pSib->pBt ){
p->pNext = pSib;
p->pPrev = 0;
pSib->pPrev = p;
}else{
while( pSib->pNext && (uptr)pSib->pNext->pBt<(uptr)p->pBt ){
pSib = pSib->pNext;
}
p->pNext = pSib->pNext;
p->pPrev = pSib;
if( p->pNext ){
p->pNext->pPrev = p;
}
pSib->pNext = p;
}
break;
}
}
}
#endif
*ppBtree = p;
btree_open_out:
if( rc!=SQLITE_OK ){
if( pBt && pBt->pPager ){
sqlite3PagerClose(pBt->pPager, 0);
}
sqlite3_free(pBt);
sqlite3_free(p);
*ppBtree = 0;
}else{
sqlite3_file *pFile;
/* If the B-Tree was successfully opened, set the pager-cache size to the
** default value. Except, when opening on an existing shared pager-cache,
** do not change the pager-cache size.
*/
if( sqlite3BtreeSchema(p, 0, 0)==0 ){
sqlite3PagerSetCachesize(p->pBt->pPager, SQLITE_DEFAULT_CACHE_SIZE);
}
pFile = sqlite3PagerFile(pBt->pPager);
if( pFile->pMethods ){
sqlite3OsFileControlHint(pFile, SQLITE_FCNTL_PDB, (void*)&pBt->db);
}
}
if( mutexOpen ){
assert( sqlite3_mutex_held(mutexOpen) );
sqlite3_mutex_leave(mutexOpen);
}
assert( rc!=SQLITE_OK || sqlite3BtreeConnectionCount(*ppBtree)>0 );
return rc;
}
/*
** Decrement the BtShared.nRef counter. When it reaches zero,
** remove the BtShared structure from the sharing list. Return
** true if the BtShared.nRef counter reaches zero and return
** false if it is still positive.
*/
static int removeFromSharingList(BtShared *pBt){
#ifndef SQLITE_OMIT_SHARED_CACHE
MUTEX_LOGIC( sqlite3_mutex *pMaster; )
BtShared *pList;
int removed = 0;
assert( sqlite3_mutex_notheld(pBt->mutex) );
MUTEX_LOGIC( pMaster = sqlite3MutexAlloc(SQLITE_MUTEX_STATIC_MASTER); )
sqlite3_mutex_enter(pMaster);
pBt->nRef--;
if( pBt->nRef<=0 ){
if( GLOBAL(BtShared*,sqlite3SharedCacheList)==pBt ){
GLOBAL(BtShared*,sqlite3SharedCacheList) = pBt->pNext;
}else{
pList = GLOBAL(BtShared*,sqlite3SharedCacheList);
while( ALWAYS(pList) && pList->pNext!=pBt ){
pList=pList->pNext;
}
if( ALWAYS(pList) ){
pList->pNext = pBt->pNext;
}
}
if( SQLITE_THREADSAFE ){
sqlite3_mutex_free(pBt->mutex);
}
removed = 1;
}
sqlite3_mutex_leave(pMaster);
return removed;
#else
return 1;
#endif
}
/*
** Make sure pBt->pTmpSpace points to an allocation of
** MX_CELL_SIZE(pBt) bytes with a 4-byte prefix for a left-child
** pointer.
*/
static void allocateTempSpace(BtShared *pBt){
if( !pBt->pTmpSpace ){
pBt->pTmpSpace = sqlite3PageMalloc( pBt->pageSize );
/* One of the uses of pBt->pTmpSpace is to format cells before
** inserting them into a leaf page (function fillInCell()). If
** a cell is less than 4 bytes in size, it is rounded up to 4 bytes
** by the various routines that manipulate binary cells. Which
** can mean that fillInCell() only initializes the first 2 or 3
** bytes of pTmpSpace, but that the first 4 bytes are copied from
** it into a database page. This is not actually a problem, but it
** does cause a valgrind error when the 1 or 2 bytes of unitialized
** data is passed to system call write(). So to avoid this error,
** zero the first 4 bytes of temp space here.
**
** Also: Provide four bytes of initialized space before the
** beginning of pTmpSpace as an area available to prepend the
** left-child pointer to the beginning of a cell.
*/
if( pBt->pTmpSpace ){
memset(pBt->pTmpSpace, 0, 8);
pBt->pTmpSpace += 4;
}
}
}
/*
** Free the pBt->pTmpSpace allocation
*/
static void freeTempSpace(BtShared *pBt){
if( pBt->pTmpSpace ){
pBt->pTmpSpace -= 4;
sqlite3PageFree(pBt->pTmpSpace);
pBt->pTmpSpace = 0;
}
}
/*
** Close an open database and invalidate all cursors.
*/
int sqlite3BtreeClose(Btree *p){
BtShared *pBt = p->pBt;
BtCursor *pCur;
/* Close all cursors opened via this handle. */
assert( sqlite3_mutex_held(p->db->mutex) );
sqlite3BtreeEnter(p);
pCur = pBt->pCursor;
while( pCur ){
BtCursor *pTmp = pCur;
pCur = pCur->pNext;
if( pTmp->pBtree==p ){
sqlite3BtreeCloseCursor(pTmp);