C. Mundi wrote:
> Hi. I have scanned the list archives and Googled. I may have missed
> something, but what I found was not sufficiently general or robust for my
> needs.
>
> Happily, sqlite has a very nice .output mode for CSV. It correctly
> double-quotes fields when they contain a space or a comma, and it correctly
> 'escapes' double-quote literals as "". Great! I need to go the other way.
>
> I don't see an inverse .import mode in sqlite to handle the completely
> general CSV syntax. As anyone who has ever tried knows, parsing compliant
> CSV is a lot harder than writing compliant CSV.
>
> I'm hoping someone will (please) tell me I missed something in the sqlite
> docs. Otherwise, I guess I'll be using python's csv module to turn my CSV
> file into SQL insert statements. This is likely to be an infequent task,
> but it has to be done perfectly. So if someone knows of a command-line tool
> to turn CSV into SQL inserts, I would appreciate that too.
Here's one I just wrote.
#!/usr/bin/env python
"""Utility to import CSV files into an SQLite database."""
from __future__ import division
import csv
import getopt
import os
import sqlite3
import sys
USAGE = 'USAGE: csv2sqlite [-o dbfile] csvfile...'
DEFAULT_OUTPUT_FILE = 'a.db'
_INTEGER_MAX = (1 << 63) - 1
_INTEGER_MIN = -_INTEGER_MAX - 1
def identify_type(value):
"""
Identify the SQLite type for a value.
"""
try:
int_value = int(value)
if int_value < _INTEGER_MIN or int_value > _INTEGER_MAX:
return 'REAL'
return 'INTEGER'
except ValueError:
try:
float(value)
return 'REAL'
except ValueError:
return 'TEXT'
def common_type(values):
"""
Return the common type for a list of values.
"""
types = set(identify_type(value) for value in values if value)
if len(types) == 1:
return types.pop()
elif types == set(['INTEGER', 'REAL']):
return 'NUMERIC'
else:
return 'TEXT'
def add_table_from_file(dbconn, filename):
"""
Read a CSV file and add it as a database table.
dbconn -- open database connection
filename -- path to the CSV file
"""
table_name = os.path.splitext(os.path.basename(filename))[0]
data = list(csv.reader(open(filename)))
if not data:
return
headers = data.pop(0)
num_columns = len(headers)
# Replace all empty strings with NULL
data = [[value or None for value in row] for row in data]
# Ensure that all rows have the same number of columns
for row in data:
for dummy in xrange(num_columns - len(row)):
row.append(None)
# Determine the appropriate type for each column
column_info = [(column_name, common_type(row[col] for row in data))
for (col, column_name) in enumerate(headers)]
# Build the SQL statements
sql_create_table = 'CREATE TABLE IF NOT EXISTS "%s" (%s)' % \
(table_name, ', '.join('%s %s' % column for column in column_info))
sql_insert = 'INSERT INTO "%s" VALUES (%s)' % \
(table_name, ', '.join(['?'] * num_columns))
# Build the database table
dbconn.execute(sql_create_table)
for row in data:
dbconn.execute(sql_insert, row)
def _main(argv=None):
"""
Executed when this module is run as a script.
"""
if argv is None:
argv = sys.argv
options, args = getopt.getopt(argv[1:], 'o:', ['help'])
options = dict(options)
if '--help' in options:
print USAGE
else:
dbconn = sqlite3.connect(options.get('-o', DEFAULT_OUTPUT_FILE))
for filename in args:
add_table_from_file(dbconn, filename)
dbconn.commit()
dbconn.close()
if __name__ == '__main__':
_main()
_______________________________________________
sqlite-users mailing list
[email protected]
http://sqlite.org:8080/cgi-bin/mailman/listinfo/sqlite-users