Nilesh has uploaded a new change for review.

  https://gerrit.wikimedia.org/r/72626


Change subject: Made corrections to wikiparser scripts and added reducer
......................................................................

Made corrections to wikiparser scripts and added reducer

Change-Id: Iaa9c8ca99cab30fe5ebd7724ef35fecdc8797b93
---
M wikiparser/wikiparser.py
M wikiparser/wikiparser_db.py
A wikiparser/wikiparser_r.py
3 files changed, 20 insertions(+), 7 deletions(-)


  git pull 
ssh://gerrit.wikimedia.org:29418/mediawiki/extensions/WikidataEntitySuggester 
refs/changes/26/72626/1

diff --git a/wikiparser/wikiparser.py b/wikiparser/wikiparser.py
index 02653d6..aab0f53 100644
--- a/wikiparser/wikiparser.py
+++ b/wikiparser/wikiparser.py
@@ -5,7 +5,6 @@
 import json
 import sys
 
-count = 0
 page = ''
 
 def main():
@@ -42,14 +41,15 @@
                             toyield1 = str(statement['value'])
                             value = 
str(statement['wikibase-entityid']['numeric-id']) if 'wikibase-entityid' in 
statement else statement['string']
                             toyield2 = str(statement['value']) + "----" + value
-                            sys.stdout.write(toyield1.encode("utf-8", 
'ignore') + "\n")
-                            sys.stdout.write(toyield2.encode("utf-8", 
'ignore') + "\n")
+                            sys.stdout.write("||\t" + str(title) + "," + 
toyield1.encode("utf-8", 'ignore') + "\n")
+                            sys.stdout.write("||\t" + str(title) + "," + 
toyield2.encode("utf-8", 'ignore') + "\n")
+                            sys.stdout.write("$$\t" + toyield1.encode("utf-8", 
'ignore') + "\n")
+                            sys.stdout.write("$$\t" + toyield2.encode("utf-8", 
'ignore') + "\n")
                         except KeyError:
                             pass
     except (KeyError, ValueError, TypeError) as e:
         sys.stderr.write("Error occurred for page : " + str(title) + ", ns = " 
+ str(page['ns']) + "\n")
         sys.stderr.write(traceback.format_exc() + "\n")
-
 
 if __name__ == '__main__':
     main()
diff --git a/wikiparser/wikiparser_db.py b/wikiparser/wikiparser_db.py
index be0c6b0..953cda1 100644
--- a/wikiparser/wikiparser_db.py
+++ b/wikiparser/wikiparser_db.py
@@ -14,7 +14,7 @@
     con = None
     cur = None
     try:
-        con = mdb.connect('localhost', 'root', 'orangetail', 'wikidatawiki');
+        con = mdb.connect('localhost', 'root', 'password', 'wikidatawiki');
         cur = con.cursor()
         cur.execute("SET FOREIGN_KEY_CHECKS = 0")
         cur.execute("SET UNIQUE_CHECKS = 0")
@@ -66,8 +66,10 @@
                             toyield1 = str(statement['value'])
                             value = 
str(statement['wikibase-entityid']['numeric-id']) if 'wikibase-entityid' in 
statement else statement['string']
                             toyield2 = str(statement['value']) + "----" + value
-                            sys.stdout.write(toyield1.encode("utf-8", 
'ignore') + "\n")
-                            sys.stdout.write(toyield2.encode("utf-8", 
'ignore') + "\n")
+                            sys.stdout.write("||\t" + str(title) + "," + 
toyield1.encode("utf-8", 'ignore') + "\n")
+                            sys.stdout.write("||\t" + str(title) + "," + 
toyield2.encode("utf-8", 'ignore') + "\n")
+                            sys.stdout.write("$$\t" + toyield1.encode("utf-8", 
'ignore') + "\n")
+                            sys.stdout.write("$$\t" + toyield2.encode("utf-8", 
'ignore') + "\n")
                         except KeyError:
                             pass
         elif page['ns'] == '120':
diff --git a/wikiparser/wikiparser_r.py b/wikiparser/wikiparser_r.py
new file mode 100644
index 0000000..52604d4
--- /dev/null
+++ b/wikiparser/wikiparser_r.py
@@ -0,0 +1,11 @@
+#!/usr/bin/python
+
+import sys
+
+def main():
+    for i in sys.stdin:
+        (key, value) = i.split("\t")
+        sys.stdout.write(value + "\n")
+        
+if __name__ == '__main__':
+    main()

-- 
To view, visit https://gerrit.wikimedia.org/r/72626
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: Iaa9c8ca99cab30fe5ebd7724ef35fecdc8797b93
Gerrit-PatchSet: 1
Gerrit-Project: mediawiki/extensions/WikidataEntitySuggester
Gerrit-Branch: master
Gerrit-Owner: Nilesh <[email protected]>

_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to