I've written an xml parser using xml.sax which works most of the time
but often traces back when trying to read a file. The output of the
traceback is below:
Traceback (most recent call last):
File /usr/lib/python2.4/site-packages/cherrypy/_cphttptools.py,
line 271, in run
main()
File /usr/lib/python2.4/site-packages/cherrypy/_cphttptools.py,
line 502, in main
body = page_handler(*args, **cherrypy.request.paramMap)
File netscan.py, line 160, in index
parse()
File netscan.py, line 117, in parse
parser.parse ( scan_results )
File /usr/lib/python2.4/xml/sax/expatreader.py, line 107, in parse
xmlreader.IncrementalParser.parse(self, source)
File /usr/lib/python2.4/xml/sax/xmlreader.py, line 125, in parse
self.close()
File /usr/lib/python2.4/xml/sax/expatreader.py, line 217, in close
self.feed(, isFinal = 1)
File /usr/lib/python2.4/xml/sax/expatreader.py, line 211, in feed
self._err_handler.fatalError(exc)
File /usr/lib/python2.4/xml/sax/handler.py, line 38, in fatalError
raise exception
SAXParseException: /var/log/netscan/scanresults.txt:8:0: no element found
I don't understand why it's telling me that no element is found. It
looks like a problem inside xml.sax, but I'm not sure if I've caused
it or how. The xml file is good and is generated by nmap, it's not
missing tags or anything and is quite small. My script code which has
generated this is below:
#!/usr/bin/env python
import xml.sax
import sys
import os
dir = '/var/log/netscan'
scan = 'scanresults.txt'
temp = 'tempscan.txt'
scan_results = dir + '/' + scan
temp_results = dir + '/' + temp
if not os.path.isdir(dir):
sys.exit(%s does not exist! exiting... % dir)
network = [
#{
#status: ,
#address : ,
#hostname : ,
#port[0] : ,
#protocol[0] : ,
#service[0]: ,
#state[0] : ,
#product[0]: ,
#version[0]: ,
#extrainfo[0] :
#}
]
class scanparser( xml.sax.ContentHandler ):
def __init__(self):
self.host = {}
self.host['status'] =
self.host['address']=
self.host['hostname'] =
self.host['port'] = []
self.host['protocol'] = []
self.host['service']= []
self.host['state'] = []
self.host['product']= []
self.host['version']= []
self.host['extrainfo'] = []
def startElement(self,name,attributes):
global scan_start,scan_stop
if name =='nmaprun':
scan_start = attributes.getValue('startstr')
elif name == 'finished':
scan_stop = attributes.getValue('timestr')
elif name =='status':
self.host['status'] = attributes.getValue('state')
elif name == 'address':
if attributes.getValue('addrtype') == 'ipv4':
self.host['address'] = attributes.getValue('addr')
elif name == 'hostname':
self.host['hostname'] = attributes.getValue('name')
elif name == 'port':
self.host['port'].append( attributes.getValue('portid') )
self.host['protocol'].append( attributes.getValue('protocol') )
elif name == 'service':
self.host['service'].append( attributes.getValue('name') )
if attributes.has_key('product'):
self.host['product'].append( attributes.getValue('product') )
else:
self.host['product'].append()
if attributes.has_key('version'):
self.host['version'].append( attributes.getValue('version') )
else:
self.host['version'].append('')
if attributes.has_key('extrainfo'):
self.host['extrainfo'].append(
attributes.getValue('extrainfo') )
else:
self.host['extrainfo'].append('')
elif name == 'state':
self.host['state'].append( attributes.getValue('state') )
def endElement(self,name):
if name == 'host':
network.append(self.host.copy())
self.host = {}
self.host['status'] =
self.host['address']=
self.host['hostname'] =
self.host['port'] = []
self.host['protocol'] = []
self.host['service']= []
self.host['state'] = []
self.host['product']= []
self.host['version']= []
self.host['extrainfo'] = []
def parse():
global network
parser = xml.sax.make_parser()
parser.setContentHandler( scanparser() )
network = []
parser.parse ( scan_results )