Brion VIBBER has uploaded a new change for review. ( https://gerrit.wikimedia.org/r/381733 )
Change subject: Workaround to use JVM's UTF-8 decoder in place of Xerces one ...................................................................... Workaround to use JVM's UTF-8 decoder in place of Xerces one It's apparently still failing in some edge cases. Applying workaround recommended on the talk page in 2013: https://www.mediawiki.org/wiki/Manual_talk:MWDumper Bug: T176829 Change-Id: I07cef81fa997af699ace105ae72e4fe67fc7eef0 --- M src/org/mediawiki/importer/XmlDumpReader.java 1 file changed, 7 insertions(+), 1 deletion(-) git pull ssh://gerrit.wikimedia.org:29418/mediawiki/tools/mwdumper refs/changes/33/381733/1 diff --git a/src/org/mediawiki/importer/XmlDumpReader.java b/src/org/mediawiki/importer/XmlDumpReader.java index aa25b12..e2c33c6 100644 --- a/src/org/mediawiki/importer/XmlDumpReader.java +++ b/src/org/mediawiki/importer/XmlDumpReader.java @@ -27,6 +27,8 @@ import java.io.IOException; import java.io.InputStream; +import java.io.InputStreamReader; +import java.io.Reader; import java.io.UnsupportedEncodingException; import java.util.Calendar; import java.util.GregorianCalendar; @@ -41,6 +43,7 @@ import org.xml.sax.Attributes; import org.xml.sax.SAXException; import org.xml.sax.helpers.DefaultHandler; +import org.xml.sax.InputSource; public class XmlDumpReader extends DefaultHandler { InputStream input; @@ -87,8 +90,11 @@ try { SAXParserFactory factory = SAXParserFactory.newInstance(); SAXParser parser = factory.newSAXParser(); + Reader reader = new InputStreamReader(input, "UTF-8"); + InputSource is = new InputSource(reader); + is.setEncoding("UTF-8"); - parser.parse(input, this); + parser.parse(is, this); } catch (ParserConfigurationException e) { throw (IOException)new IOException(e.getMessage()).initCause(e); } catch (SAXException e) { -- To view, visit https://gerrit.wikimedia.org/r/381733 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: newchange Gerrit-Change-Id: I07cef81fa997af699ace105ae72e4fe67fc7eef0 Gerrit-PatchSet: 1 Gerrit-Project: mediawiki/tools/mwdumper Gerrit-Branch: master Gerrit-Owner: Brion VIBBER <br...@wikimedia.org> _______________________________________________ MediaWiki-commits mailing list MediaWiki-commits@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits