Author: ogrisel
Date: Wed Mar 28 10:01:08 2012
New Revision: 1306237
URL: http://svn.apache.org/viewvc?rev=1306237&view=rev
Log:
STANBOL-197: less verbose ouput for the newsml batch importer script
Modified:
incubator/stanbol/trunk/enhancer/topic-web/tools/newsmlimporter.py
Modified: incubator/stanbol/trunk/enhancer/topic-web/tools/newsmlimporter.py
URL:
http://svn.apache.org/viewvc/incubator/stanbol/trunk/enhancer/topic-web/tools/newsmlimporter.py?rev=1306237&r1=1306236&r2=1306237&view=diff
==============================================================================
--- incubator/stanbol/trunk/enhancer/topic-web/tools/newsmlimporter.py
(original)
+++ incubator/stanbol/trunk/enhancer/topic-web/tools/newsmlimporter.py Wed Mar
28 10:01:08 2012
@@ -22,6 +22,7 @@ TODO: port to Python 3 as well if not wo
from __future__ import print_function
import os
+from time import time
from lxml import html
from lxml import etree
from urllib import quote
@@ -61,11 +62,10 @@ def register_newsml_document(text, codes
url += "?example_id=%s" % id
for code in codes:
url += "&concept=%s" % quote(code)
- print("Calling:", url)
request = urllib2.Request(url, data=text.encode('utf-8'))
request.add_header('Content-Type', 'text/plain')
opener = urllib2.build_opener()
- print(opener.open(request).read())
+ opener.open(request).read()
def print_newsml_summary(text, codes, server_url=None):
@@ -86,6 +86,7 @@ if __name__ == "__main__":
handle_news = register_newsml_document
count = 0
+ previous = time()
for dirpath, dirnames, filenames in os.walk(topfolder):
if count >= max:
break
@@ -106,3 +107,7 @@ if __name__ == "__main__":
continue
handle_news(text, codes, server_url)
count += 1
+ if count % 100 == 0:
+ delta, previous = time() - previous, time()
+ print("Processed news %03d/%03d in %06.3fs"
+ % (count, max, delta))