blob: 59b9a154f41189937fd1342d94e51bffd351db36 [file] [log] [blame]
# Copyright 2018 The Chromium Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
"""
Helper methods for dealing with a SQLite database with pandas.
"""
from core.external_modules import pandas
def DataFrame(column_types, index=None, rows=None):
"""Create a DataFrame with given column types as index.
Unlike usual pandas DataFrame constructors, this allows to have explicitly
typed column values, even when no rows of data are provided. And, when such
data is available, values are explicitly casted, instead of letting pandas
guess a type.
Args:
column_types: A sequence of (name, dtype) pairs to define the columns.
index: An optional column name or sequence of column names to use as index
of the frame.
rows: An optional sequence of rows of data.
"""
if rows:
cols = zip(*rows)
assert len(cols) == len(column_types)
cols = (list(vs) for vs in cols)
else:
cols = (None for _ in column_types)
df = pandas.DataFrame()
for (column, dtype), values in zip(column_types, cols):
df[column] = pandas.Series(values, dtype=dtype)
if index is not None:
index = [index] if isinstance(index, basestring) else list(index)
df.set_index(index, inplace=True)
return df
def CreateTableIfNotExists(con, name, frame):
"""Create a new empty table, if it doesn't already exist.
Args:
con: A sqlite connection object.
name: Name of SQL table to create.
frame: A DataFrame used to infer the schema of the table; the index of the
DataFrame is set as PRIMARY KEY of the table.
"""
keys = [k for k in frame.index.names if k is not None]
if not keys:
keys = None
db = pandas.io.sql.SQLiteDatabase(con)
table = pandas.io.sql.SQLiteTable(
name, db, frame=frame, index=keys is not None, keys=keys,
if_exists='append')
table.create()
def _InsertOrReplaceStatement(name, keys):
columns = ','.join(keys)
values = ','.join('?' for _ in keys)
return 'INSERT OR REPLACE INTO %s(%s) VALUES (%s)' % (name, columns, values)
def InsertOrReplaceRecords(con, name, frame):
"""Insert or replace records from a DataFrame into a SQLite database.
Assumes that the table already exists. Any new records with a matching
PRIMARY KEY, usually the frame.index, will replace existing records.
Args:
con: A sqlite connection object.
name: Name of SQL table.
frame: DataFrame with records to write.
"""
db = pandas.io.sql.SQLiteDatabase(con)
table = pandas.io.sql.SQLiteTable(
name, db, frame=frame, index=True, if_exists='append')
assert table.exists()
keys, data = table.insert_data()
insert_statement = _InsertOrReplaceStatement(name, keys)
with db.run_transaction() as c:
c.executemany(insert_statement, zip(*data))