Mholloway has uploaded a new change for review.

  https://gerrit.wikimedia.org/r/302288

Change subject: Replace hand-rolled HTML manipulation functions with string.js
......................................................................

Replace hand-rolled HTML manipulation functions with string.js

This library provides the same functionality in a more robust way, so
we should use it.

Change-Id: Iaa737b0ac40f40d14c92fe44bcec7f96e170f99a
---
M lib/mobile-util.js
M lib/parseDefinition.js
M package.json
M test/lib/mobile-util/mobile-util-test.js
4 files changed, 18 insertions(+), 36 deletions(-)


  git pull ssh://gerrit.wikimedia.org:29418/mediawiki/services/mobileapps 
refs/changes/88/302288/1

diff --git a/lib/mobile-util.js b/lib/mobile-util.js
index c6ef0e5..551df36 100644
--- a/lib/mobile-util.js
+++ b/lib/mobile-util.js
@@ -167,13 +167,6 @@
     return dateString + '/' + uuid.now().toString();
 };
 
-/**
- * Strip HTML markup from a string.
- */
-mUtil.stripMarkup = function(text) {
-    return text.replace(/<[^>]*>/g, '');
-};
-
 mUtil.throw404 = function(message) {
     throw new HTTPError({
         status: 404,
diff --git a/lib/parseDefinition.js b/lib/parseDefinition.js
index b76fa1a..53b45f5 100644
--- a/lib/parseDefinition.js
+++ b/lib/parseDefinition.js
@@ -8,6 +8,7 @@
 'use strict';
 
 var domino = require('domino');
+var string = require('string');
 var sUtil = require('./util');
 var mUtil = require('./mobile-util');
 var transforms = require('./transforms');
@@ -64,21 +65,6 @@
                   */
                 };
 
-function stripSpanTags(text) {
-    return text.replace(/<\/?span[^>]*>/g, '');
-}
-
-function stripItalicTags(text) {
-    return text.replace(/<\/?i[^>]*>/g, '');
-}
-
-function decodeHTML(text) {
-    return text.replace(/&lt;/g,'<')
-               .replace(/&gt;/g,'>')
-               .replace(/&amp;/g,'&')
-               .replace(/&nbsp;/g, ' ');
-}
-
 function hasUsageExamples(text, langCode) {
     return langCode === 'en' ? text.indexOf('<dl') > -1 : text.indexOf('<ul') 
> -1;
 }
@@ -119,17 +105,17 @@
         currentDefinition.examples = [];
         examples = wikiLangCode === 'en' ? element.querySelectorAll('dd') : 
element.querySelectorAll('li');
         for (j = 0; j < examples.length; j++) {
-            example = examples[j].innerHTML.trim();
-            example = decodeHTML(example);
-            example = stripSpanTags(example);
-            example = stripItalicTags(example);
+            example = string(examples[j].innerHTML.trim())
+                .decodeHTMLEntities()
+                .stripTags('i', 'span')
+                .s;
             currentDefinition.examples.push(example);
         }
     } else {
-        defn = element.innerHTML.trim();
-        defn = decodeHTML(defn);
-        defn = stripSpanTags(defn);
-        defn = stripItalicTags(defn);
+        defn = string(element.innerHTML.trim())
+            .decodeHTMLEntities()
+            .stripTags('i', 'span')
+            .s;
         currentDefinition.definition = defn;
     }
     return currentDefinition;
@@ -160,7 +146,7 @@
 
     for (j = 0; j < sectionDivs.length; j++) {
         currentSectionDiv = sectionDivs[j];
-        header = mUtil.stripMarkup(currentSectionDiv.title);
+        header = string(currentSectionDiv.title).stripTags().s;
 
         /* Get the language from the first H2 header, and begin iterating over 
sections.
            Per the English Wiktionary style guide (linked in header above), H2 
headings
diff --git a/package.json b/package.json
index 4a5dad2..c8085fc 100644
--- a/package.json
+++ b/package.json
@@ -46,11 +46,13 @@
     "core-js": "^2.4.1",
     "domino": "^1.0.25",
     "express": "^4.14.0",
+    "he": "^1.1.0",
     "js-yaml": "^3.6.1",
     "locutus": "^2.0.5",
     "mediawiki-title": "^0.5.4",
     "preq": "^0.4.10",
     "service-runner": "^2.0.4",
+    "string": "3.3.1",
     "swagger-router": "^0.4.6",
     "underscore": "^1.8.3"
   },
diff --git a/test/lib/mobile-util/mobile-util-test.js 
b/test/lib/mobile-util/mobile-util-test.js
index 9027296..ccc198e 100644
--- a/test/lib/mobile-util/mobile-util-test.js
+++ b/test/lib/mobile-util/mobile-util-test.js
@@ -2,6 +2,7 @@
 
 var assert = require('../../utils/assert');
 var mUtil = require('../../../lib/mobile-util');
+var string = require('string');
 var preq  = require('preq');
 
 var obj1 = { hello: true, world: true };
@@ -56,10 +57,10 @@
         assert.deepEqual(arr3, arr4);
     })
 
-    it('HTML stripping regex should strip HTML as expected', function() {
-        assert.deepEqual(mUtil.stripMarkup(newsHtml1).trim(), newsText1);
-        assert.deepEqual(mUtil.stripMarkup(newsHtml2).trim(), newsText2);
-        assert.deepEqual(mUtil.stripMarkup(newsHtml3).trim(), newsText3);
-        assert.deepEqual(mUtil.stripMarkup(newsHtml4).trim(), newsText4);
+    it('String.js should strip HTML tags as expected', function() {
+        assert.deepEqual(string(newsHtml1).stripTags().s.trim(), newsText1);
+        assert.deepEqual(string(newsHtml2).stripTags().s.trim(), newsText2);
+        assert.deepEqual(string(newsHtml3).stripTags().s.trim(), newsText3);
+        assert.deepEqual(string(newsHtml4).stripTags().s.trim(), newsText4);
     })
 });

-- 
To view, visit https://gerrit.wikimedia.org/r/302288
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: Iaa737b0ac40f40d14c92fe44bcec7f96e170f99a
Gerrit-PatchSet: 1
Gerrit-Project: mediawiki/services/mobileapps
Gerrit-Branch: master
Gerrit-Owner: Mholloway <[email protected]>

_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to