I already read
http://groups.google.com/group/django-users/browse_thread/thread/57e862310c2bf664/4e187a95a5651934?q=pylucene&rnum=1#4e187a95a5651934
However, I need use pyLucene inside a django app I'm working.
Just now, is working all fine. I have this:
import os
from PyLucene import IndexWriter, IndexReader, StandardAnalyzer,
Document, Field, \
Hits, IndexSearcher, FSDirectory, RAMDirectory,
PrefixQuery, QueryParser, Term, TermQuery
from django.conf import settings
from elementtree.ElementTree import Element, SubElement, dump
# Copyrigth Soluciones Vulcano Ltda www.solucionesvulcano.com
# searchEngine: Motor de busqueda basado en pyLucene
""" Motor de busqueda """
class SearchEngine:
def __init__(self):
#self.storeDir = settings.BASE_DIR +
'data\\files\\searchIndex\\'
#self.storeDir =
'F:\\Proyectos\\jhonWeb\\data\\files\\searchIndex\\'
self.storeDir =
'E:\\Proyectos\\Python\\jhonWeb\\data\\files\\searchIndex\\'
self.Index = None
self.estaCreado = os.path.exists(self.storeDir + 'segments')
self.analyzer = StandardAnalyzer()
#Esta opcion es para soportar los test automaticos... evita se
distorcione el indice del disco
#if settings.DATABASE_NAME==':memory:':
# self.store = RAMDirectory()
# self.estaCreado = False #Para que inicialize el indice en
memoria
#else:
self.store = FSDirectory.getDirectory(self.storeDir,
self.estaCreado == False)
self.writer = None
#Crear el archivo de indice
if self.estaCreado == False:
self.writer = IndexWriter( self.store , self.analyzer, True
) #self.getIndexWriter()
self.writer.close()
self.estaCreado = True
def deleteIndex(self):
print 'Eliminando...'
import glob
archivos = glob.glob(self.storeDir + '**')
for archivo in archivos:
print 'Borrando %s' % archivo
os.remove(os.path.join(self.store,archivo))
print 'Finalizado.'
def close(self):
if (self.writer is None) == False:
self.writer.close()
self.writer = None
if (self.Index is None) == False:
self.Index.close()
self.Index = None
def add(self, doc):
#Mirar si es necesario actualizar...
id = doc.get("id")
#if self.hitCount("id",id ) > 0:
reader = IndexReader.open( self.store ) #self.getIndexReader()
#try:
reader.deleteDocuments( Term("id", id ) )
#finally:
reader.close()
#Agregar al indice...
writer = IndexWriter( self.store , self.analyzer, False )
#self.getIndexWriter()
#try:
writer.addDocument(doc)
#finally:
writer.close()
def getIndexReader(self):
return IndexReader.open( self.store )
def getIndexWriter(self):
return IndexWriter( self.store , self.analyzer, False )
def searchPrefix(self,field,prefix):
t = Term(field, prefix)
self.Index = IndexSearcher( self.store )
query = PrefixQuery(t)
hits = self.Index.search(query)
return hits
def searchTerm(self,field,searchFor):
t = Term(field, searchFor)
self.Index = IndexSearcher( self.store )
query = TermQuery(t)
hits = self.Index.search(query)
return hits
def hitCount(self,field,searchFor):
hits = self.searchTerm(field,searchFor)
return hits.length()
def search(self, queryStr):
self.Index = IndexSearcher( self.store )
query = QueryParser("content", self.analyzer).parse(queryStr)
hits = self.Index.search(query)
return hits
def __str__(self):
return 'Motor de busqueda'
The thing I'm not sure is about thread safety. I'm confident in the
Lucene stuuf (in fact I'm working in a port of Lucene to Delphi) but
not know in the context of django what to do.
The rule is only have a GLOBAL writer object, and I can do concurrent
reading operations on the index.
So, how do it the rigth way (tm) ?
--~--~---------~--~----~------------~-------~--~----~
You received this message because you are subscribed to the Google Groups
"Django users" group.
To post to this group, send email to [email protected]
To unsubscribe from this group, send email to [EMAIL PROTECTED]
For more options, visit this group at
http://groups.google.com/group/django-users
-~----------~----~----~----~------~----~------~--~---