blob: b048ec67ffd83a0dac1551dc2b56c99fcd318bd1 [file] [log] [blame] [edit]
/*
** 2009 March 26
**
** The author disclaims copyright to this source code. In place of
** a legal notice, here is a blessing:
**
** May you do good and not evil.
** May you find forgiveness for yourself and forgive others.
** May you share freely, never taking more than you give.
**
*************************************************************************
** This file contains code for implementations of the CSV
** algorithms packaged as an SQLite virtual table module.
*/
#if defined(_WIN32) || defined(WIN32)
/* This needs to come before any includes for MSVC compiler */
#define _CRT_SECURE_NO_WARNINGS
#endif
#if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_CSV)
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#ifndef SQLITE_CORE
#include "sqlite3ext.h"
SQLITE_EXTENSION_INIT1
#else
#include "sqlite3.h"
#endif
#define UNUSED_PARAMETER(x) (void)(x)
/*
** The CSV virtual-table types.
*/
typedef struct CSV CSV;
typedef struct CSVCursor CSVCursor;
/*
** An CSV virtual-table object.
*/
struct CSV {
sqlite3_vtab base; /* Must be first */
sqlite3 *db; /* Host database connection */
char *zDb; /* Name of database containing CSV table */
char *zName; /* Name of CSV table */
char *zFile; /* Name of CSV file */
int nBusy; /* Current number of users of this structure */
FILE *f; /* File pointer for source CSV file */
long offsetFirstRow; /* ftell position of first row */
int eof; /* True when at end of file */
int maxRow; /* Size of zRow buffer */
char *zRow; /* Buffer for current CSV row */
char cDelim; /* Character to use for delimiting columns */
int nCol; /* Number of columns in current row */
int maxCol; /* Size of aCols array */
char **aCols; /* Array of parsed columns */
};
/*
** An CSV cursor object.
*/
struct CSVCursor {
sqlite3_vtab_cursor base; /* Must be first */
long csvpos; /* ftell position of current zRow */
};
/*
** Forward declarations.
*/
static int csvNext( sqlite3_vtab_cursor* pVtabCursor );
static int csvInit(
sqlite3 *db, /* Database connection */
void *pAux, /* Unused */
int argc, const char *const*argv, /* Parameters to CREATE TABLE statement */
sqlite3_vtab **ppVtab, /* OUT: New virtual table */
char **pzErr, /* OUT: Error message, if any */
int isCreate /* True for xCreate, false for xConnect */
);
static void csvReference( CSV *pCSV );
static int csvRelease( CSV *pCSV );
/*
** Abstract out file io routines for porting
*/
static FILE *csv_open( CSV *pCSV ){
return fopen( pCSV->zFile, "rb" );
}
static void csv_close( CSV *pCSV ){
if( pCSV->f ) fclose( pCSV->f );
}
static int csv_seek( CSV *pCSV, long pos ){
return fseek( pCSV->f, pos, SEEK_SET );
}
static long csv_tell( CSV *pCSV ){
return ftell( pCSV->f );
}
/*
** This routine reads a line of text from FILE in, stores
** the text in memory obtained from malloc() and returns a pointer
** to the text. NULL is returned at end of file, or if malloc()
** fails.
**
** The interface is like "readline" but no command-line editing
** is done.
**
** This code was modified from existing code in shell.c of the sqlite3 CLI.
*/
static char *csv_getline( CSV *pCSV ){
int n = 0;
int bEol = 0;
int bShrink = 0;
/* allocate initial row buffer */
if( pCSV->maxRow < 1 ){
pCSV->zRow = sqlite3_malloc( 100 );
if( pCSV->zRow ){
pCSV->maxRow = 100;
}
}
if( !pCSV->zRow ) return 0;
/* read until eol */
while( !bEol ){
/* grow row buffer as needed */
if( n+100>pCSV->maxRow ){
int newSize = pCSV->maxRow*2 + 100;
char *p = sqlite3_realloc(pCSV->zRow, newSize);
if( !p ) return 0;
pCSV->maxRow = newSize;
pCSV->zRow = p;
bShrink = -1;
}
if( fgets(&pCSV->zRow[n], pCSV->maxRow-n, pCSV->f)==0 ){
if( n==0 ){
break;
}
pCSV->zRow[n] = '\0';
bEol = -1;
break;
}
/* look for line delimiter */
while( pCSV->zRow[n] ){ n++; }
if( (n>0) && ((pCSV->zRow[n-1]=='\n') || (pCSV->zRow[n-1]=='\r')) ){
pCSV->zRow[n-1] = '\n'; /* uniform line ending */
pCSV->zRow[n] = '\0';
bEol = -1;
}
}
if( bShrink ){
pCSV->zRow = realloc( pCSV->zRow, n+1 );
pCSV->maxRow = n+1;
}
return bEol ? pCSV->zRow : 0;
}
/*
** CSV virtual table module xCreate method.
*/
static int csvCreate(
sqlite3* db,
void *pAux,
int argc,
const char *const *argv,
sqlite3_vtab **ppVtab,
char **pzErr
){
return csvInit( db, pAux, argc, argv, ppVtab, pzErr, 1 );
}
/*
** CSV virtual table module xConnect method.
*/
static int csvConnect(
sqlite3 *db,
void *pAux,
int argc, const char *const*argv,
sqlite3_vtab **ppVtab,
char **pzErr
){
return csvInit(db, pAux, argc, argv, ppVtab, pzErr, 0);
}
/*
** CSV virtual table module xBestIndex method.
*/
static int csvBestIndex( sqlite3_vtab *pVtab, sqlite3_index_info* info )
{
UNUSED_PARAMETER(pVtab);
UNUSED_PARAMETER(info);
/* TBD */
return SQLITE_OK;
}
/*
** CSV virtual table module xDisconnect method.
*/
static int csvDisconnect( sqlite3_vtab *pVtab ){
return csvRelease( (CSV *)pVtab );
}
/*
** CSV virtual table module xDestroy method.
*/
static int csvDestroy( sqlite3_vtab *pVtab ){
return csvDisconnect( pVtab );
}
/*
** CSV virtual table module xOpen method.
*/
static int csvOpen( sqlite3_vtab *pVtab, sqlite3_vtab_cursor **ppVtabCursor ){
int rc = SQLITE_NOMEM;
CSVCursor *pCsr;
/* create a new cursor object */
pCsr = (CSVCursor *)sqlite3_malloc(sizeof(CSVCursor));
if( pCsr ){
memset(pCsr, 0, sizeof(CSVCursor));
pCsr->base.pVtab = pVtab;
rc = SQLITE_OK;
}
*ppVtabCursor = (sqlite3_vtab_cursor *)pCsr;
return rc;
}
/*
** CSV virtual table module xClose method.
*/
static int csvClose( sqlite3_vtab_cursor *pVtabCursor ){
CSVCursor *pCsr = (CSVCursor *)pVtabCursor;
sqlite3_free(pCsr);
return SQLITE_OK;
}
/*
** CSV virtual table module xFilter method.
*/
static int csvFilter(
sqlite3_vtab_cursor *pVtabCursor,
int idxNum, const char *idxStr,
int argc, sqlite3_value **argv
){
CSV *pCSV = (CSV *)pVtabCursor->pVtab;
int rc;
UNUSED_PARAMETER(idxNum);
UNUSED_PARAMETER(idxStr);
UNUSED_PARAMETER(argc);
UNUSED_PARAMETER(argv);
csvReference( pCSV );
/* seek back to start of first zRow */
pCSV->eof = 0;
csv_seek( pCSV, pCSV->offsetFirstRow );
/* read and parse next line */
rc = csvNext( pVtabCursor );
csvRelease( pCSV );
return rc;
}
/*
** CSV virtual table module xNext method.
*/
static int csvNext( sqlite3_vtab_cursor* pVtabCursor ){
CSV *pCSV = (CSV *)pVtabCursor->pVtab;
CSVCursor *pCsr = (CSVCursor *)pVtabCursor;
int nCol = 0;
char *s;
char zDelims[3] = ",\n";
char cDelim; /* char that delimited current col */
if( pCSV->eof ){
return SQLITE_ERROR;
}
/* update the cursor */
pCsr->csvpos = csv_tell( pCSV );
/* read the next row of data */
s = csv_getline( pCSV );
if( !s ){
/* and error or eof occured */
pCSV->eof = -1;
return SQLITE_OK;
}
/* allocate initial space for the column pointers */
if( pCSV->maxCol < 1 ){
/* take a guess */
int maxCol = (int)(strlen(pCSV->zRow) / 5 + 1);
pCSV->aCols = (char **)sqlite3_malloc( sizeof(char*) * maxCol );
if( pCSV->aCols ){
pCSV->maxCol = maxCol;
}
}
if( !pCSV->aCols ) return SQLITE_NOMEM;
/* add custom delim character */
zDelims[0] = pCSV->cDelim;
/* parse the zRow into individual columns */
do{
/* if it begins with a quote, assume it's a quoted col */
if( *s=='\"' ){
s++; /* skip quote */
pCSV->aCols[nCol] = s; /* save pointer for this col */
/* TBD: handle escaped quotes "" */
/* find closing quote */
s = strchr(s, '\"');
if( !s ){
/* no closing quote */
pCSV->eof = -1;
return SQLITE_ERROR;
}
*s = '\0'; /* null terminate this col */
/* fall through and look for following ",\n" */
s++;
}else{
pCSV->aCols[nCol] = s; /* save pointer for this col */
}
s = strpbrk(s, zDelims);
if( !s ){
/* no col delimiter */
pCSV->eof = -1;
return SQLITE_ERROR;
}
cDelim = *s;
/* null terminate the column by overwriting the delimiter */
*s = '\0';
nCol++;
/* if end of zRow, stop parsing cols */
if( cDelim == '\n' ) break;
/* move to start of next col */
s++; /* skip delimiter */
if(nCol >= pCSV->maxCol ){
/* we need to grow our col pointer array */
char **p = (char **)sqlite3_realloc( pCSV->aCols, sizeof(char*) * (nCol+5) );
if( !p ){
/* out of memory */
return SQLITE_ERROR;
}
pCSV->maxCol = nCol + 5;
pCSV->aCols = p;
}
}while( *s );
pCSV->nCol = nCol;
return SQLITE_OK;
}
/*
** CSV virtual table module xEof method.
**
** Return non-zero if the cursor does not currently point to a valid
** record (i.e if the scan has finished), or zero otherwise.
*/
static int csvEof( sqlite3_vtab_cursor *pVtabCursor )
{
CSV *pCSV = (CSV *)pVtabCursor->pVtab;
return pCSV->eof;
}
/*
** CSV virtual table module xColumn method.
*/
static int csvColumn(sqlite3_vtab_cursor *pVtabCursor, sqlite3_context *ctx, int i){
CSV *pCSV = (CSV *)pVtabCursor->pVtab;
if( i<0 || i>=pCSV->nCol ){
sqlite3_result_null( ctx );
}else{
char *col = pCSV->aCols[i];
if( !col ){
sqlite3_result_null( ctx );
}else{
sqlite3_result_text( ctx, col, -1, SQLITE_TRANSIENT );
}
}
return SQLITE_OK;
}
/*
** CSV virtual table module xRowid method.
** We probably should store a hidden table
** mapping rowid's to csvpos.
*/
static int csvRowid( sqlite3_vtab_cursor* pVtabCursor, sqlite3_int64 *pRowid ){
CSVCursor *pCsr = (CSVCursor *)pVtabCursor;
*pRowid = pCsr->csvpos;
return SQLITE_OK;
}
static sqlite3_module csvModule = {
0, /* iVersion */
csvCreate, /* xCreate - create a table */
csvConnect, /* xConnect - connect to an existing table */
csvBestIndex, /* xBestIndex - Determine search strategy */
csvDisconnect, /* xDisconnect - Disconnect from a table */
csvDestroy, /* xDestroy - Drop a table */
csvOpen, /* xOpen - open a cursor */
csvClose, /* xClose - close a cursor */
csvFilter, /* xFilter - configure scan constraints */
csvNext, /* xNext - advance a cursor */
csvEof, /* xEof */
csvColumn, /* xColumn - read data */
csvRowid, /* xRowid - read data */
0, /* xUpdate - write data */
0, /* xBegin - begin transaction */
0, /* xSync - sync transaction */
0, /* xCommit - commit transaction */
0, /* xRollback - rollback transaction */
0, /* xFindFunction - function overloading */
0 /* xRename - rename the table */
};
/*
** Increment the CSV reference count.
*/
static void csvReference( CSV *pCSV ){
pCSV->nBusy++;
}
/*
** Decrement the CSV reference count. When the reference count reaches
** zero the structure is deleted.
*/
static int csvRelease( CSV *pCSV ){
pCSV->nBusy--;
if( pCSV->nBusy<1 ){
/* finalize any prepared statements here */
csv_close( pCSV );
if( pCSV->zRow ) sqlite3_free( pCSV->zRow );
if( pCSV->aCols ) sqlite3_free( pCSV->aCols );
sqlite3_free( pCSV );
}
return 0;
}
/*
** This function is the implementation of both the xConnect and xCreate
** methods of the CSV virtual table.
**
** argv[0] -> module name
** argv[1] -> database name
** argv[2] -> table name
** argv[3] -> csv file name
** argv[4] -> custom delimiter
** argv[5] -> optional: use header row for column names
*/
static int csvInit(
sqlite3 *db, /* Database connection */
void *pAux, /* Unused */
int argc, const char *const*argv, /* Parameters to CREATE TABLE statement */
sqlite3_vtab **ppVtab, /* OUT: New virtual table */
char **pzErr, /* OUT: Error message, if any */
int isCreate /* True for xCreate, false for xConnect */
){
int rc = SQLITE_OK;
int i;
CSV *pCSV;
char *zSql;
char cDelim = ','; /* Default col delimiter */
int bUseHeaderRow = 0; /* Default to not use zRow headers */
size_t nDb; /* Length of string argv[1] */
size_t nName; /* Length of string argv[2] */
size_t nFile; /* Length of string argv[3] */
CSVCursor csvCsr; /* Used for calling csvNext */
const char *aErrMsg[] = {
0, /* 0 */
"No CSV file specified", /* 1 */
"Error opening CSV file: '%s'", /* 2 */
"No columns found", /* 3 */
"No column name found", /* 4 */
"Out of memory", /* 5 */
};
UNUSED_PARAMETER(pAux);
UNUSED_PARAMETER(isCreate);
if( argc < 4 ){
*pzErr = sqlite3_mprintf("%s", aErrMsg[1]);
return SQLITE_ERROR;
}
/* allocate space for the virtual table object */
nDb = strlen(argv[1]);
nName = strlen(argv[2]);
nFile = strlen(argv[3]);
pCSV = (CSV *)sqlite3_malloc( (int)(sizeof(CSV)+nDb+nName+nFile+3) );
if( !pCSV ){
/* out of memory */
*pzErr = sqlite3_mprintf("%s", aErrMsg[5]);
return SQLITE_NOMEM;
}
/* intialize virtual table object */
memset(pCSV, 0, sizeof(CSV)+nDb+nName+nFile+3);
pCSV->nBusy = 1;
pCSV->base.pModule = &csvModule;
pCSV->cDelim = cDelim;
pCSV->zDb = (char *)&pCSV[1];
pCSV->zName = &pCSV->zDb[nDb+1];
pCSV->zFile = &pCSV->zName[nName+1];
memcpy(pCSV->zDb, argv[1], nDb);
memcpy(pCSV->zName, argv[2], nName);
/* pull out name of csv file (remove quotes) */
if( argv[3][0] == '\'' ){
memcpy( pCSV->zFile, argv[3]+1, nFile-2 );
pCSV->zFile[nFile-2] = '\0';
}else{
memcpy( pCSV->zFile, argv[3], nFile );
}
/* if a custom delimiter specified, pull it out */
if( argc > 4 ){
if( argv[4][0] == '\'' ){
pCSV->cDelim = argv[4][1];
}else{
pCSV->cDelim = argv[4][0];
}
}
/* should the header zRow be used */
if( argc > 5 ){
if( !strcmp(argv[5], "USE_HEADER_ROW") ){
bUseHeaderRow = -1;
}
}
/* open the source csv file */
pCSV->f = csv_open( pCSV );
if( !pCSV->f ){
*pzErr = sqlite3_mprintf(aErrMsg[2], pCSV->zFile);
csvRelease( pCSV );
return SQLITE_ERROR;
}
/* Read first zRow to obtain column names/number */
csvCsr.base.pVtab = (sqlite3_vtab *)pCSV;
rc = csvNext( (sqlite3_vtab_cursor *)&csvCsr );
if( (SQLITE_OK!=rc) || (pCSV->nCol<=0) ){
*pzErr = sqlite3_mprintf("%s", aErrMsg[3]);
csvRelease( pCSV );
return SQLITE_ERROR;
}
if( bUseHeaderRow ){
pCSV->offsetFirstRow = csv_tell( pCSV );
}
/* Create the underlying relational database schema. If
** that is successful, call sqlite3_declare_vtab() to configure
** the csv table schema.
*/
zSql = sqlite3_mprintf("CREATE TABLE x(");
for(i=0; zSql && i<pCSV->nCol; i++){
const char *zTail = (i+1<pCSV->nCol) ? ", " : ");";
char *zTmp = zSql;
if( bUseHeaderRow ){
const char *zCol = pCSV->aCols[i];
if( !zCol ){
*pzErr = sqlite3_mprintf("%s", aErrMsg[4]);
sqlite3_free(zSql);
csvRelease( pCSV );
return SQLITE_ERROR;
}
zSql = sqlite3_mprintf("%s%s%s", zTmp, zCol, zTail);
}else{
zSql = sqlite3_mprintf("%scol%d%s", zTmp, i+1, zTail);
}
sqlite3_free(zTmp);
}
if( !zSql ){
*pzErr = sqlite3_mprintf("%s", aErrMsg[5]);
csvRelease( pCSV );
return SQLITE_NOMEM;
}
rc = sqlite3_declare_vtab( db, zSql );
sqlite3_free(zSql);
if( SQLITE_OK != rc ){
*pzErr = sqlite3_mprintf("%s", sqlite3_errmsg(db));
csvRelease( pCSV );
return SQLITE_ERROR;
}
*ppVtab = (sqlite3_vtab *)pCSV;
*pzErr = NULL;
return SQLITE_OK;
}
/*
** Register the CSV module with database handle db. This creates the
** virtual table module "csv".
*/
int sqlite3CsvInit(sqlite3 *db){
int rc = SQLITE_OK;
if( rc==SQLITE_OK ){
void *c = (void *)NULL;
rc = sqlite3_create_module_v2(db, "csv", &csvModule, c, 0);
}
return rc;
}
#if !SQLITE_CORE
/*
** Support auto-extension loading.
*/
int sqlite3_extension_init(
sqlite3 *db,
char **pzErrMsg,
const sqlite3_api_routines *pApi
){
SQLITE_EXTENSION_INIT2(pApi)
return sqlite3CsvInit(db);
}
#endif
#endif