jenkins-bot has submitted this change and it was merged. ( https://gerrit.wikimedia.org/r/385420 )
Change subject: Hygiene: create script to compare old and new sectioning code ...................................................................... Hygiene: create script to compare old and new sectioning code The output of this allows us to have more confidence when changing the sectioning code. It could probably be used for other changes, too. This script requires some extensive setup. See the comments at the beginning of the script file. Bug: T178707 Change-Id: I0a8116ec8fe9278a31e8dea781ee646251b83c94 --- M .eslintignore M .gitignore A scripts/compare-sections.js 3 files changed, 144 insertions(+), 0 deletions(-) Approvals: jenkins-bot: Verified Mholloway: Looks good to me, approved diff --git a/.eslintignore b/.eslintignore index 9065f89..1b5a54d 100644 --- a/.eslintignore +++ b/.eslintignore @@ -1,3 +1,4 @@ +private/compare-sections/*.json private/top-pages/top-pages.*.json test/diff/results/page_definition-enwiktionary-*.json test/diff/results/page_*MCS_Test_Frankenstein.json diff --git a/.gitignore b/.gitignore index 28e4a2d..bb8bd8a 100644 --- a/.gitignore +++ b/.gitignore @@ -8,3 +8,4 @@ .DS_Store tmp/ /fixtures/ +/private/compare-sections/ diff --git a/scripts/compare-sections.js b/scripts/compare-sections.js new file mode 100755 index 0000000..62f636e --- /dev/null +++ b/scripts/compare-sections.js @@ -0,0 +1,142 @@ +#!/usr/bin/env node + +'use strict'; + +/* + Setup notes before running this script: + * Start two local Parsoid instances on ports 8000 and 8001. The latter would get the new code. + * Start two local MCS instances on ports 6927 and 6928. + * Change the config.dev.yaml also to hook up with the respective local Parsoid installations, e.g. + * v1) MCS:6927 -> Parsoid:8000 + * v2) MCS:6928 -> Parsoid:8001 + * and towards the end of the config.dev.yaml also change the restbase_req uri value to + * v1) uri: http://0.0.0.0:8000/{{domain}}/v3/{+path} + * v2) uri: http://0.0.0.0:8001/{{domain}}/v3/{+path} + * Run the script from the script folder. + + Arguments: provide a single argument which is the language code for the Wikipedia project. + + Example: + $ cd scripts + $ ./compare-sections.js en + + The output will be in the private/compare-sections folder. Since the output is much larger than + for text extracts each page gets it's own file. + Note: the output will be massaged to allow for easier diffing by reducing uninteresting variances + and by adding line breaks at strategic points. +*/ + +const BBPromise = require('bluebird'); +const fs = require('fs'); +const mkdir = require('mkdirp'); +const preq = require('preq'); + +const DELAY = 10; // delay between requests in ms +const topPagesDir = '../private/top-pages'; +const outDir = '../private/compare-sections'; + +let lang; +let topPages; + +let oldDirName; +let newDirName; + +const uriForOldMobileSections = (title, rev, lang) => { + return `http://localhost:6927/${lang}.wikipedia.org/v1/page/mobile-sections/${encodeURIComponent(title)}/${rev}`; +}; + +const uriForNewSections = (title, rev, lang) => { + return `http://localhost:6928/${lang}.wikipedia.org/v1/page/mobile-sections/${encodeURIComponent(title)}/${rev}`; +}; + +/** + * Remove some values which vary between implementation but don't have anything to do with + * sectioning. + */ +const simplifyExtractValue = (value) => { + return value && value + .replace(/"revision": "\w+",/, '"revision": "ZZZ",') + .replace(/"lastmodified": "\w+",/, '"lastmodified": "ZZZ",') + .replace(/"user": "\w+",/, '"user": "ZZZ",') + .replace(/"gender": "\w+",/, '"gender": "ZZZ",') + .replace(/#ImageMap_\d+_\d+/g, '#ImageMap_0_000') + .replace(/<img src="\/\//g, '<img src="https://') + .replace(/ srcset=\\".+?\\"/g, '') + .replace(/ class=\\"mw-redirect\\"/g, '') + .replace(/ id=\\"mw[-\w]+\\"/g, '') + .replace(/#mwt\d{1,4}/g, '#mwt000') + .replace(/ data-mw=\\"\\.+?\\}\\"/g, ' data-mw=\"{}\"') + // break lines for easier diffing: + .replace(/(<h\d)/g, "\n$1") + .replace(/(<\/h\d>)/g, "$1\n") + .replace(/(<section)/g, "\n$1") + .replace(/(<\/section>)/g, "$1\n") + .replace(/(.{50}[^<>]{0,50}>?)/g, "$1\n") + // ^ keep lines to a reasonable width (try to break near HTML tags) + ; +}; + +const getExtractHtml = (response) => { + if (response.status !== 200) { + return `!! STATUS = ${response.status} !!\n`; + } + return simplifyExtractValue(JSON.stringify(response.body, null, 2)); +}; + +const writeFile = (dir, title, rev, value) => { + const file = fs.createWriteStream(`${dir}_${encodeURIComponent(title)}-${rev}.json`, + { flags: 'w' }); + file.write(`${value}\n`); + file.end(); +}; + +const compareExtracts = (filePrefix, oldExtract, newExtract, counter, title, rev) => { + writeFile(`${oldDirName}/${filePrefix}`, title, rev, oldExtract); + writeFile(`${newDirName}/${filePrefix}`, title, rev, newExtract); +}; + +const fetchExtract = (uri) => { + return preq.get({ uri }) + .then((response) => { + return BBPromise.delay(DELAY, getExtractHtml(response)); + }).catch((err) => { + return BBPromise.resolve(`!!! ${err} "${uri}" !!!`); + }); +}; + +const fetchAndVerify = (filePrefix, title, rev, counter, lang) => { + process.stdout.write('.'); + let newExtract; + return fetchExtract(uriForNewSections(title, rev, lang)) + .then((response) => { + newExtract = response; + return fetchExtract(uriForOldMobileSections(title, rev, lang)); + }).then((oldExtract) => { + compareExtracts(filePrefix, oldExtract, newExtract, counter, title, rev); + }); +}; + +const processOneLanguage = (lang) => { + let counter = 0; + BBPromise.each(topPages, (page) => { + const filePrefix = (`0000${++counter}`).slice(-4); // 0-pad + return fetchAndVerify(filePrefix, page.title, page.rev.split('/', 1)[0], counter, lang); + }); +}; + +// MAIN +const arg = process.argv[2]; +if (arg) { + lang = arg; + topPages = require(`${topPagesDir}/top-pages.${lang}.json`).items; + oldDirName = `${outDir}/v1/${lang}`; + newDirName = `${outDir}/v2/${lang}`; + + mkdir.sync(oldDirName); + mkdir.sync(newDirName); + + processOneLanguage(arg); +} else { + process.stderr.write(`Error: supply one language parameter (e.g. en)!\n`); + process.exit(-1); +} -- To view, visit https://gerrit.wikimedia.org/r/385420 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: merged Gerrit-Change-Id: I0a8116ec8fe9278a31e8dea781ee646251b83c94 Gerrit-PatchSet: 4 Gerrit-Project: mediawiki/services/mobileapps Gerrit-Branch: master Gerrit-Owner: BearND <bsitzm...@wikimedia.org> Gerrit-Reviewer: BearND <bsitzm...@wikimedia.org> Gerrit-Reviewer: Dbrant <dbr...@wikimedia.org> Gerrit-Reviewer: Fjalapeno <cfl...@wikimedia.org> Gerrit-Reviewer: GWicke <gwi...@wikimedia.org> Gerrit-Reviewer: Jdlrobson <jrob...@wikimedia.org> Gerrit-Reviewer: Mholloway <mhollo...@wikimedia.org> Gerrit-Reviewer: Mhurd <mh...@wikimedia.org> Gerrit-Reviewer: Mobrovac <mobro...@wikimedia.org> Gerrit-Reviewer: Ppchelko <ppche...@wikimedia.org> Gerrit-Reviewer: jenkins-bot <> _______________________________________________ MediaWiki-commits mailing list MediaWiki-commits@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits