jenkins-bot has submitted this change and it was merged. ( 
https://gerrit.wikimedia.org/r/385420 )

Change subject: Hygiene: create script to compare old and new sectioning code
......................................................................


Hygiene: create script to compare old and new sectioning code

The output of this allows us to have more confidence when changing
the sectioning code. It could probably be used for other changes, too.
This script requires some extensive setup. See the comments at the
beginning of the script file.

Bug: T178707
Change-Id: I0a8116ec8fe9278a31e8dea781ee646251b83c94
---
M .eslintignore
M .gitignore
A scripts/compare-sections.js
3 files changed, 144 insertions(+), 0 deletions(-)

Approvals:
  jenkins-bot: Verified
  Mholloway: Looks good to me, approved



diff --git a/.eslintignore b/.eslintignore
index 9065f89..1b5a54d 100644
--- a/.eslintignore
+++ b/.eslintignore
@@ -1,3 +1,4 @@
+private/compare-sections/*.json
 private/top-pages/top-pages.*.json
 test/diff/results/page_definition-enwiktionary-*.json
 test/diff/results/page_*MCS_Test_Frankenstein.json
diff --git a/.gitignore b/.gitignore
index 28e4a2d..bb8bd8a 100644
--- a/.gitignore
+++ b/.gitignore
@@ -8,3 +8,4 @@
 .DS_Store
 tmp/
 /fixtures/
+/private/compare-sections/
diff --git a/scripts/compare-sections.js b/scripts/compare-sections.js
new file mode 100755
index 0000000..62f636e
--- /dev/null
+++ b/scripts/compare-sections.js
@@ -0,0 +1,142 @@
+#!/usr/bin/env node
+
+'use strict';
+
+/*
+  Setup notes before running this script:
+  * Start two local Parsoid instances on ports 8000 and 8001. The latter would 
get the new code.
+  * Start two local MCS instances on ports 6927 and 6928.
+  * Change the config.dev.yaml also to hook up with the respective local 
Parsoid installations, e.g.
+  *     v1) MCS:6927 -> Parsoid:8000
+  *     v2) MCS:6928 -> Parsoid:8001
+  * and towards the end of the config.dev.yaml also change the restbase_req 
uri value to
+  *     v1) uri: http://0.0.0.0:8000/{{domain}}/v3/{+path}
+  *     v2) uri: http://0.0.0.0:8001/{{domain}}/v3/{+path}
+  * Run the script from the script folder.
+
+  Arguments: provide a single argument which is the language code for the 
Wikipedia project.
+
+  Example:
+  $ cd scripts
+  $ ./compare-sections.js en
+
+  The output will be in the private/compare-sections folder. Since the output 
is much larger than
+  for text extracts each page gets it's own file.
+  Note: the output will be massaged to allow for easier diffing by reducing 
uninteresting variances
+  and by adding line breaks at strategic points.
+*/
+
+const BBPromise = require('bluebird');
+const fs = require('fs');
+const mkdir = require('mkdirp');
+const preq = require('preq');
+
+const DELAY = 10; // delay between requests in ms
+const topPagesDir = '../private/top-pages';
+const outDir = '../private/compare-sections';
+
+let lang;
+let topPages;
+
+let oldDirName;
+let newDirName;
+
+const uriForOldMobileSections = (title, rev, lang) => {
+    return 
`http://localhost:6927/${lang}.wikipedia.org/v1/page/mobile-sections/${encodeURIComponent(title)}/${rev}`;
+};
+
+const uriForNewSections = (title, rev, lang) => {
+    return 
`http://localhost:6928/${lang}.wikipedia.org/v1/page/mobile-sections/${encodeURIComponent(title)}/${rev}`;
+};
+
+/**
+ * Remove some values which vary between implementation but don't have 
anything to do with
+ * sectioning.
+ */
+const simplifyExtractValue = (value) => {
+    return value && value
+        .replace(/"revision": "\w+",/, '"revision": "ZZZ",')
+        .replace(/"lastmodified": "\w+",/, '"lastmodified": "ZZZ",')
+        .replace(/"user": "\w+",/, '"user": "ZZZ",')
+        .replace(/"gender": "\w+",/, '"gender": "ZZZ",')
+        .replace(/#ImageMap_\d+_\d+/g, '#ImageMap_0_000')
+        .replace(/<img src="\/\//g, '<img src="https://')
+        .replace(/ srcset=\\".+?\\"/g, '')
+        .replace(/ class=\\"mw-redirect\\"/g, '')
+        .replace(/ id=\\"mw[-\w]+\\"/g, '')
+        .replace(/#mwt\d{1,4}/g, '#mwt000')
+        .replace(/ data-mw=\\"\\.+?\\}\\"/g, ' data-mw=\"{}\"')
+        // break lines for easier diffing:
+        .replace(/(<h\d)/g, "\n$1")
+        .replace(/(<\/h\d>)/g, "$1\n")
+        .replace(/(<section)/g, "\n$1")
+        .replace(/(<\/section>)/g, "$1\n")
+        .replace(/(.{50}[^<>]{0,50}>?)/g, "$1\n")
+        // ^ keep lines to a reasonable width (try to break near HTML tags)
+        ;
+};
+
+const getExtractHtml = (response) => {
+    if (response.status !== 200) {
+        return `!! STATUS = ${response.status} !!\n`;
+    }
+    return simplifyExtractValue(JSON.stringify(response.body, null, 2));
+};
+
+const writeFile = (dir, title, rev, value) => {
+    const file = 
fs.createWriteStream(`${dir}_${encodeURIComponent(title)}-${rev}.json`,
+        { flags: 'w' });
+    file.write(`${value}\n`);
+    file.end();
+};
+
+const compareExtracts = (filePrefix, oldExtract, newExtract, counter, title, 
rev) => {
+    writeFile(`${oldDirName}/${filePrefix}`, title, rev, oldExtract);
+    writeFile(`${newDirName}/${filePrefix}`, title, rev, newExtract);
+};
+
+const fetchExtract = (uri) => {
+    return preq.get({ uri })
+    .then((response) => {
+        return BBPromise.delay(DELAY, getExtractHtml(response));
+    }).catch((err) => {
+        return BBPromise.resolve(`!!! ${err} "${uri}" !!!`);
+    });
+};
+
+const fetchAndVerify = (filePrefix, title, rev, counter, lang) => {
+    process.stdout.write('.');
+    let newExtract;
+    return fetchExtract(uriForNewSections(title, rev, lang))
+    .then((response) => {
+        newExtract = response;
+        return fetchExtract(uriForOldMobileSections(title, rev, lang));
+    }).then((oldExtract) => {
+        compareExtracts(filePrefix, oldExtract, newExtract, counter, title, 
rev);
+    });
+};
+
+const processOneLanguage = (lang) => {
+    let counter = 0;
+    BBPromise.each(topPages, (page) => {
+        const filePrefix = (`0000${++counter}`).slice(-4); // 0-pad
+        return fetchAndVerify(filePrefix, page.title, page.rev.split('/', 
1)[0], counter, lang);
+    });
+};
+
+// MAIN
+const arg = process.argv[2];
+if (arg) {
+    lang = arg;
+    topPages = require(`${topPagesDir}/top-pages.${lang}.json`).items;
+    oldDirName = `${outDir}/v1/${lang}`;
+    newDirName = `${outDir}/v2/${lang}`;
+
+    mkdir.sync(oldDirName);
+    mkdir.sync(newDirName);
+
+    processOneLanguage(arg);
+} else {
+    process.stderr.write(`Error: supply one language parameter (e.g. en)!\n`);
+    process.exit(-1);
+}

-- 
To view, visit https://gerrit.wikimedia.org/r/385420
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: merged
Gerrit-Change-Id: I0a8116ec8fe9278a31e8dea781ee646251b83c94
Gerrit-PatchSet: 4
Gerrit-Project: mediawiki/services/mobileapps
Gerrit-Branch: master
Gerrit-Owner: BearND <bsitzm...@wikimedia.org>
Gerrit-Reviewer: BearND <bsitzm...@wikimedia.org>
Gerrit-Reviewer: Dbrant <dbr...@wikimedia.org>
Gerrit-Reviewer: Fjalapeno <cfl...@wikimedia.org>
Gerrit-Reviewer: GWicke <gwi...@wikimedia.org>
Gerrit-Reviewer: Jdlrobson <jrob...@wikimedia.org>
Gerrit-Reviewer: Mholloway <mhollo...@wikimedia.org>
Gerrit-Reviewer: Mhurd <mh...@wikimedia.org>
Gerrit-Reviewer: Mobrovac <mobro...@wikimedia.org>
Gerrit-Reviewer: Ppchelko <ppche...@wikimedia.org>
Gerrit-Reviewer: jenkins-bot <>

_______________________________________________
MediaWiki-commits mailing list
MediaWiki-commits@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to