Brion VIBBER has uploaded a new change for review. ( 
https://gerrit.wikimedia.org/r/381733 )

Change subject: Workaround to use JVM's UTF-8 decoder in place of Xerces one
......................................................................

Workaround to use JVM's UTF-8 decoder in place of Xerces one

It's apparently still failing in some edge cases.
Applying workaround recommended on the talk page in 2013:
https://www.mediawiki.org/wiki/Manual_talk:MWDumper

Bug: T176829
Change-Id: I07cef81fa997af699ace105ae72e4fe67fc7eef0
---
M src/org/mediawiki/importer/XmlDumpReader.java
1 file changed, 7 insertions(+), 1 deletion(-)


  git pull ssh://gerrit.wikimedia.org:29418/mediawiki/tools/mwdumper 
refs/changes/33/381733/1

diff --git a/src/org/mediawiki/importer/XmlDumpReader.java 
b/src/org/mediawiki/importer/XmlDumpReader.java
index aa25b12..e2c33c6 100644
--- a/src/org/mediawiki/importer/XmlDumpReader.java
+++ b/src/org/mediawiki/importer/XmlDumpReader.java
@@ -27,6 +27,8 @@
 
 import java.io.IOException;
 import java.io.InputStream;
+import java.io.InputStreamReader;
+import java.io.Reader;
 import java.io.UnsupportedEncodingException;
 import java.util.Calendar;
 import java.util.GregorianCalendar;
@@ -41,6 +43,7 @@
 import org.xml.sax.Attributes;
 import org.xml.sax.SAXException;
 import org.xml.sax.helpers.DefaultHandler;
+import org.xml.sax.InputSource;
 
 public class XmlDumpReader  extends DefaultHandler {
        InputStream input;
@@ -87,8 +90,11 @@
                try {
                        SAXParserFactory factory = 
SAXParserFactory.newInstance();
                        SAXParser parser = factory.newSAXParser();
+                       Reader reader = new InputStreamReader(input, "UTF-8");
+                       InputSource is = new InputSource(reader);
+                       is.setEncoding("UTF-8");
        
-                       parser.parse(input, this);
+                       parser.parse(is, this);
                } catch (ParserConfigurationException e) {
                        throw (IOException)new 
IOException(e.getMessage()).initCause(e);
                } catch (SAXException e) {

-- 
To view, visit https://gerrit.wikimedia.org/r/381733
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: I07cef81fa997af699ace105ae72e4fe67fc7eef0
Gerrit-PatchSet: 1
Gerrit-Project: mediawiki/tools/mwdumper
Gerrit-Branch: master
Gerrit-Owner: Brion VIBBER <br...@wikimedia.org>

_______________________________________________
MediaWiki-commits mailing list
MediaWiki-commits@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to