I have the following scrip which transforms several pytables files by
changing the format of one field in a table from a StringCol to Int32
col. I have a couple of problems:
1. The memory usage of the process keeps growing and then dies. I have
to restart it several times
2. Is there any way to do this without converting it to a python list
and make it faster.
import os
from tables import *
from time import clock, time
from path import path
from mx.DateTime import Parser, Time, DateTime
from attrdict import attrdict
filterProps = Filters(complevel=2, complib='blosc')
#Old table description:
#class DataDescriptionDaily(IsDescription):
# fullSymbol = StringCol(100, pos=0)
# calcField = StringCol(100, pos=1)
# hhmmss = StringCol(6, pos=2)
# value = Float32Col(pos=3)
class DataDescriptionDaily(IsDescription):
fullSymbol = StringCol(100, pos=0)
calcField = StringCol(100, pos=1)
hhmmss = Int32Col(pos=2)
value = Float32Col(pos=3)
def getTables(mapping):
try:
mapping.tblData = mapping.hdf5.getNode('/data')
except:
mapping.tblData = mapping.hdf5.createTable('/', 'data',
DataDescriptionDaily, expectedrows=75000000, filters=filterProps)
createIndexes(mapping)
def createIndexes(mapping):
if mapping.tblData.cols.hhmmss.is_indexed:
mapping.tblData.cols.hhmmss.reIndex()
else:
mapping.tblData.cols.hhmmss.createCSIndex()
if mapping.tblData.cols.calcField.is_indexed:
mapping.tblData.cols.calcField.reIndex()
else:
mapping.tblData.cols.calcField.createCSIndex()
if mapping.tblData.cols.fullSymbol.is_indexed:
mapping.tblData.cols.fullSymbol.reIndex()
else:
mapping.tblData.cols.fullSymbol.createCSIndex()
mapping.hdf5.flush()
dirPathList = [path("/calc")]
for dirPath in dirPathList:
for f in dirPath.files("*.h5"):
oldFileName = path("%s.old" % f)
if oldFileName.exists():
print 'Ignoring file: ', f
continue
mapping = attrdict()
mapping.filePath = f
mapping.hdf5 = openFile(mapping.filePath, "a")
getTables(mapping)
mapping2 = attrdict()
mapping2.filePath = f+"_new"
mapping2.hdf5 = openFile(mapping2.filePath, "w")
getTables(mapping2)
#save symbol mapping
rowsToBeInserted = []
for row in mapping.tblData.iterrows():
row[2] = int(row[2])
rowsToBeInserted.append((row[0], row[1], row[2], row[3]))
if rowsToBeInserted:
mapping2.tblData.append(rowsToBeInserted)
mapping2.tblData.flush()
#create index and flush file
createIndexes(mapping2)
mapping2.hdf5.flush()
#get rid of old file
lines = os.popen("mv %s %s.old" % (mapping.filePath,
mapping.filePath)).readlines()
if lines: print lines
lines = os.popen("mv %s %s" % (mapping2.filePath,
mapping.filePath)).readlines()
if lines: print lines
------------------------------------------------------------------------------
This SF.net email is sponsored by
Make an app they can't live without
Enter the BlackBerry Developer Challenge
http://p.sf.net/sfu/RIM-dev2dev
_______________________________________________
Pytables-users mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/pytables-users