Mholloway has uploaded a new change for review.
https://gerrit.wikimedia.org/r/302288
Change subject: Replace hand-rolled HTML manipulation functions with string.js
......................................................................
Replace hand-rolled HTML manipulation functions with string.js
This library provides the same functionality in a more robust way, so
we should use it.
Change-Id: Iaa737b0ac40f40d14c92fe44bcec7f96e170f99a
---
M lib/mobile-util.js
M lib/parseDefinition.js
M package.json
M test/lib/mobile-util/mobile-util-test.js
4 files changed, 18 insertions(+), 36 deletions(-)
git pull ssh://gerrit.wikimedia.org:29418/mediawiki/services/mobileapps
refs/changes/88/302288/1
diff --git a/lib/mobile-util.js b/lib/mobile-util.js
index c6ef0e5..551df36 100644
--- a/lib/mobile-util.js
+++ b/lib/mobile-util.js
@@ -167,13 +167,6 @@
return dateString + '/' + uuid.now().toString();
};
-/**
- * Strip HTML markup from a string.
- */
-mUtil.stripMarkup = function(text) {
- return text.replace(/<[^>]*>/g, '');
-};
-
mUtil.throw404 = function(message) {
throw new HTTPError({
status: 404,
diff --git a/lib/parseDefinition.js b/lib/parseDefinition.js
index b76fa1a..53b45f5 100644
--- a/lib/parseDefinition.js
+++ b/lib/parseDefinition.js
@@ -8,6 +8,7 @@
'use strict';
var domino = require('domino');
+var string = require('string');
var sUtil = require('./util');
var mUtil = require('./mobile-util');
var transforms = require('./transforms');
@@ -64,21 +65,6 @@
*/
};
-function stripSpanTags(text) {
- return text.replace(/<\/?span[^>]*>/g, '');
-}
-
-function stripItalicTags(text) {
- return text.replace(/<\/?i[^>]*>/g, '');
-}
-
-function decodeHTML(text) {
- return text.replace(/</g,'<')
- .replace(/>/g,'>')
- .replace(/&/g,'&')
- .replace(/ /g, ' ');
-}
-
function hasUsageExamples(text, langCode) {
return langCode === 'en' ? text.indexOf('<dl') > -1 : text.indexOf('<ul')
> -1;
}
@@ -119,17 +105,17 @@
currentDefinition.examples = [];
examples = wikiLangCode === 'en' ? element.querySelectorAll('dd') :
element.querySelectorAll('li');
for (j = 0; j < examples.length; j++) {
- example = examples[j].innerHTML.trim();
- example = decodeHTML(example);
- example = stripSpanTags(example);
- example = stripItalicTags(example);
+ example = string(examples[j].innerHTML.trim())
+ .decodeHTMLEntities()
+ .stripTags('i', 'span')
+ .s;
currentDefinition.examples.push(example);
}
} else {
- defn = element.innerHTML.trim();
- defn = decodeHTML(defn);
- defn = stripSpanTags(defn);
- defn = stripItalicTags(defn);
+ defn = string(element.innerHTML.trim())
+ .decodeHTMLEntities()
+ .stripTags('i', 'span')
+ .s;
currentDefinition.definition = defn;
}
return currentDefinition;
@@ -160,7 +146,7 @@
for (j = 0; j < sectionDivs.length; j++) {
currentSectionDiv = sectionDivs[j];
- header = mUtil.stripMarkup(currentSectionDiv.title);
+ header = string(currentSectionDiv.title).stripTags().s;
/* Get the language from the first H2 header, and begin iterating over
sections.
Per the English Wiktionary style guide (linked in header above), H2
headings
diff --git a/package.json b/package.json
index 4a5dad2..c8085fc 100644
--- a/package.json
+++ b/package.json
@@ -46,11 +46,13 @@
"core-js": "^2.4.1",
"domino": "^1.0.25",
"express": "^4.14.0",
+ "he": "^1.1.0",
"js-yaml": "^3.6.1",
"locutus": "^2.0.5",
"mediawiki-title": "^0.5.4",
"preq": "^0.4.10",
"service-runner": "^2.0.4",
+ "string": "3.3.1",
"swagger-router": "^0.4.6",
"underscore": "^1.8.3"
},
diff --git a/test/lib/mobile-util/mobile-util-test.js
b/test/lib/mobile-util/mobile-util-test.js
index 9027296..ccc198e 100644
--- a/test/lib/mobile-util/mobile-util-test.js
+++ b/test/lib/mobile-util/mobile-util-test.js
@@ -2,6 +2,7 @@
var assert = require('../../utils/assert');
var mUtil = require('../../../lib/mobile-util');
+var string = require('string');
var preq = require('preq');
var obj1 = { hello: true, world: true };
@@ -56,10 +57,10 @@
assert.deepEqual(arr3, arr4);
})
- it('HTML stripping regex should strip HTML as expected', function() {
- assert.deepEqual(mUtil.stripMarkup(newsHtml1).trim(), newsText1);
- assert.deepEqual(mUtil.stripMarkup(newsHtml2).trim(), newsText2);
- assert.deepEqual(mUtil.stripMarkup(newsHtml3).trim(), newsText3);
- assert.deepEqual(mUtil.stripMarkup(newsHtml4).trim(), newsText4);
+ it('String.js should strip HTML tags as expected', function() {
+ assert.deepEqual(string(newsHtml1).stripTags().s.trim(), newsText1);
+ assert.deepEqual(string(newsHtml2).stripTags().s.trim(), newsText2);
+ assert.deepEqual(string(newsHtml3).stripTags().s.trim(), newsText3);
+ assert.deepEqual(string(newsHtml4).stripTags().s.trim(), newsText4);
})
});
--
To view, visit https://gerrit.wikimedia.org/r/302288
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: newchange
Gerrit-Change-Id: Iaa737b0ac40f40d14c92fe44bcec7f96e170f99a
Gerrit-PatchSet: 1
Gerrit-Project: mediawiki/services/mobileapps
Gerrit-Branch: master
Gerrit-Owner: Mholloway <[email protected]>
_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits