On Thu, Dec 18, 2008 at 3:58 PM, klia <alwaseem307s...@yahoo.com> wrote: > hey guys, i have a hug .csv file which i need to insert it into sqlite > database using python. > my csv data looks like this > Birthday2,12/5/2008,HTC,this is my birthday > Sea,12/3/2008,kodak,sea > birthday4,14/3/2009,samsung,birthday > love,17/4/2009,SONY,view of island > > can any one give me a head start codes.
How big ? When size is important in data processing, you should _never_ try to load it all up at once. Use filters... Here's a head start: $ csv2sql.py mydata.csv | sqlite3 mydatabase.db Here's the source to my csv2sql.py tool (1): ------------------------------------------------------------ #!/usr/bin/env python # Module: csv2sql # Date: 14th September 2008 # Author: James Mills, prologic at shortcircuit dot net dot au """csv2sql Tool to convert CSV data files into SQL statements that can be used to create SQL tables. Each line of text in the file is read, parsed and converted to SQL and output to stdout (which can be piped). """ __desc__ = "CSV to SQL Tool" __version__ = "0.2" __author__ = "James Mills" __email__ = "%s, prologic at shortcircuit dot net dot au" % __author__ __url__ = "http://shortcircuit.net.au/~prologic/" __copyright__ = "CopyRight (C) 2008 by %s" % __author__ __license__ = "GPL" import os import csv import optparse from cStringIO import StringIO USAGE = "%prog [options] <file>" VERSION = "%prog v" + __version__ def parse_options(): """parse_options() -> opts, args Parse any command-line options given returning both the parsed options and arguments. """ parser = optparse.OptionParser(usage=USAGE, version=VERSION) parser.add_option("-t", "--table", action="store", default=None, dest="table", help="Specify table name") parser.add_option("-f", "--fields", action="store", default=None, dest="fields", help="Specify a list of fields") opts, args = parser.parse_args() if len(args) < 1: parser.print_help() raise SystemExit, 1 return opts, args def mkBuffer(fd): buffer = StringIO() buffer.write(fd.read()) buffer.seek(0) fd.close() return buffer def readCSV(file): if type(file) == str: fd = open(file, "rU") else: fd = file fd = mkBuffer(fd) sniffer = csv.Sniffer() dialect = sniffer.sniff(fd.readline()) fd.seek(0) reader = csv.reader(fd, dialect) for line in reader: yield line def main(): opts, args = parse_options() file = args[0] if file == "-": fd = sys.stdin if opts.table is None: print "ERROR: No table specified and stdin used." raise SystemExit(1) else: fd = open(file, "rU") if opts.table is None: table = os.path.splitext(file)[0] else: table = opts.table for line in readCSV(fd): if opts.fields: fields = [x.strip() for x in opts.fields.split(",")] fields = "(%s)" % ",".join(fields) else: fields = "" values = ",".join(["\"%s\"" % x for x in line]) print "INSERT INTO %s %s VALUES (%s);" % (table, fields, values) if __name__ == "__main__": main() ---------------------------------------------------------------- Hope this helps, Here's a small test of using it: jmi...@atomant:~$ python Python 2.5.2 (r252:60911, Oct 5 2008, 19:24:49) [GCC 4.3.2] on linux2 Type "help", "copyright", "credits" or "license" for more information. >>> fd = open("test.csv", "w") >>> for i in xrange(1000000): ... fd.write("foo,bar,%d\n" % i) ... >>> fd.close() >>> jmi...@atomant:~$ du -h test.csv 15M test.csv jmi...@atomant:~$ wc -l test.csv 1000000 test.csv jmi...@atomant:~$ time csv2sql.py test.csv > test.sql real 0m14.303s user 0m8.877s sys 0m0.676s cheers James References: 1. http://hg.softcircuit.com.au/projects/tools/ -- http://mail.python.org/mailman/listinfo/python-list