[MediaWiki-commits] [Gerrit] mediawiki...mobileapps[master]: Hygiene: remove extracts.js

2016-12-20 Thread BearND (Code Review)
BearND has submitted this change and it was merged. ( 
https://gerrit.wikimedia.org/r/327396 )

Change subject: Hygiene: remove extracts.js
..


Hygiene: remove extracts.js

Follow-up of Idc70d6e989a67421ee5af1b0d2350cfc06a3da94.
Should have removed that then.

Change-Id: I35ee543be4c9ae6d14857fefc1ef42b64f18fa34
---
D lib/extract.js
1 file changed, 0 insertions(+), 112 deletions(-)

Approvals:
  jenkins-bot: Verified
  Jdlrobson: Looks good to me, approved



diff --git a/lib/extract.js b/lib/extract.js
deleted file mode 100644
index 9142f97..000
--- a/lib/extract.js
+++ /dev/null
@@ -1,112 +0,0 @@
-'use strict';
-
-/**
- Article extracts
- */
-
-/**
- * @param {string} [str]
- * @return {string} str, less parenthetical expressions and their leading 
whitespace, if balanced.
- */
-function removeParens(str) {
-function count(paren) {
-return ((str || '').match(new RegExp(`\\${paren}`, 'g')) || []).length;
-}
-
-const openCount = count('(');
-const closeCount = count(')');
-const regex = /\s*\([^()]*\)/g;
-return openCount && openCount === closeCount ? 
removeParens(str.replace(regex, '')) : str;
-}
-
-/**
- * Find all matches of regex in text, calling callback with each match object
- *
- * TODO: remove when switching to Parsoid. Copied from:
- * 
https://github.com/wikimedia/mediawiki-services-cxserver/blob/0d21a808f7ab6b82086171af927467c1b9460626/lineardoc/Utils.js
- *
- * @param {string} text The text to search
- * @param {Regex} regex The regex to search; should be created for this 
function call
- * @param {Function} callback Function to call with each match
- * @return {Array} The return values from the callback
- */
-function findAll(text, regex, callback) {
-const boundaries = [];
-do {
-const match = regex.exec(text);
-if (match === null) {
-break;
-}
-const boundary = callback(text, match);
-if (boundary !== null) {
-boundaries.push(boundary);
-}
-} while (regex.test(text));
-return boundaries;
-}
-
-/**
- * Test a possible English sentence boundary match
- *
- * TODO: remove when switching to Parsoid. Copied from:
- * 
https://github.com/wikimedia/mediawiki-services-cxserver/blob/0d21a808f7ab6b82086171af927467c1b9460626/segmentation/languages/SegmenterDefault.js
- *
- * @param {string} text The plaintext to segment
- * @param {Object} match The possible boundary match (returned by regex.exec)
- * @return {number|null} The boundary offset, or null if not a sentence 
boundary
- */
-function findBoundary(text, match) {
-const tail = text.slice(match.index + 1, text.length);
-const head = text.slice(0, match.index);
-
-// Trailing non-final punctuation: not a sentence boundary
-if (tail.match(/^[,;:]/)) {
-return null;
-}
-// Next word character is number or lower-case: not a sentence boundary
-if (tail.match(/^\W*[0-9a-z]/)) {
-return null;
-}
-
-// Do not break in abbreviations. Example D. John, St. Peter
-const lastWord = head.match(/(\w*)$/)[0];
-// Exclude at most 2 letter abbreviations. Examples: T. Dr. St. Jr. Sr. 
Ms. Mr.
-// But not all caps like "UK." as in  "UK. Not US",
-if (lastWord.length <= 2 && lastWord.match(/^\W*[A-Z][a-z]?$/) && 
tail.match(/^\W*[A-Z]/)) {
-return null;
-}
-
-// Include any closing punctuation and trailing space
-return match.index + 1 + tail.match(/^['”"’]*\s*/)[0].length;
-}
-
-/**
- * Find English sentence boundaries
- *
- * TODO: remove when switching to Parsoid. Copied from:
- * 
https://github.com/wikimedia/mediawiki-services-cxserver/blob/0d21a808f7ab6b82086171af927467c1b9460626/segmentation/languages/SegmenterDefault.js
- *
- * @param {string} text The plaintext to segment
- * @returns {number[]} Sentence boundary offsets
- */
-function getBoundaries(text) {
-// Regex to find possible English sentence boundaries.
-// Must not use a shared regex instance (re.lastIndex is used)
-return findAll(text, /[.!?]/g, findBoundary);
-}
-
-function format(extract) {
-const MAX_SENTENCES = 2;
-const cleanStr = removeParens(extract.replace(/\s+/g, ' '));
-const boundaries = getBoundaries(cleanStr);
-const cleanStrEndIndex = boundaries[Math.min(boundaries.length, 
MAX_SENTENCES - 1)];
-
-const ret = cleanStr.slice(0, cleanStrEndIndex).trim();
-if (ret !== '…' && ret !== '..') {
-return ret;
-}
-}
-
-module.exports = {
-format
-};

-- 
To view, visit https://gerrit.wikimedia.org/r/327396
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: merged
Gerrit-Change-Id: I35ee543be4c9ae6d14857fefc1ef42b64f18fa34
Gerrit-PatchSet: 2
Gerrit-Project: mediawiki/services/mobileapps
Gerrit-Branch: master
Gerrit-Owner: BearND 
Gerrit-Reviewer: BearND 
Gerrit-Reviewer: Jdlrobson 
Gerrit-Reviewer: jenkins-bot <>

___

[MediaWiki-commits] [Gerrit] mediawiki...mobileapps[master]: Hygiene: remove extracts.js

2016-12-14 Thread BearND (Code Review)
BearND has uploaded a new change for review. ( 
https://gerrit.wikimedia.org/r/327396 )

Change subject: Hygiene: remove extracts.js
..

Hygiene: remove extracts.js

Follow-up of Idc70d6e989a67421ee5af1b0d2350cfc06a3da94.
Should have removed that then.

Change-Id: I35ee543be4c9ae6d14857fefc1ef42b64f18fa34
---
D lib/extract.js
1 file changed, 0 insertions(+), 112 deletions(-)


  git pull ssh://gerrit.wikimedia.org:29418/mediawiki/services/mobileapps 
refs/changes/96/327396/1

diff --git a/lib/extract.js b/lib/extract.js
deleted file mode 100644
index 9142f97..000
--- a/lib/extract.js
+++ /dev/null
@@ -1,112 +0,0 @@
-'use strict';
-
-/**
- Article extracts
- */
-
-/**
- * @param {string} [str]
- * @return {string} str, less parenthetical expressions and their leading 
whitespace, if balanced.
- */
-function removeParens(str) {
-function count(paren) {
-return ((str || '').match(new RegExp(`\\${paren}`, 'g')) || []).length;
-}
-
-const openCount = count('(');
-const closeCount = count(')');
-const regex = /\s*\([^()]*\)/g;
-return openCount && openCount === closeCount ? 
removeParens(str.replace(regex, '')) : str;
-}
-
-/**
- * Find all matches of regex in text, calling callback with each match object
- *
- * TODO: remove when switching to Parsoid. Copied from:
- * 
https://github.com/wikimedia/mediawiki-services-cxserver/blob/0d21a808f7ab6b82086171af927467c1b9460626/lineardoc/Utils.js
- *
- * @param {string} text The text to search
- * @param {Regex} regex The regex to search; should be created for this 
function call
- * @param {Function} callback Function to call with each match
- * @return {Array} The return values from the callback
- */
-function findAll(text, regex, callback) {
-const boundaries = [];
-do {
-const match = regex.exec(text);
-if (match === null) {
-break;
-}
-const boundary = callback(text, match);
-if (boundary !== null) {
-boundaries.push(boundary);
-}
-} while (regex.test(text));
-return boundaries;
-}
-
-/**
- * Test a possible English sentence boundary match
- *
- * TODO: remove when switching to Parsoid. Copied from:
- * 
https://github.com/wikimedia/mediawiki-services-cxserver/blob/0d21a808f7ab6b82086171af927467c1b9460626/segmentation/languages/SegmenterDefault.js
- *
- * @param {string} text The plaintext to segment
- * @param {Object} match The possible boundary match (returned by regex.exec)
- * @return {number|null} The boundary offset, or null if not a sentence 
boundary
- */
-function findBoundary(text, match) {
-const tail = text.slice(match.index + 1, text.length);
-const head = text.slice(0, match.index);
-
-// Trailing non-final punctuation: not a sentence boundary
-if (tail.match(/^[,;:]/)) {
-return null;
-}
-// Next word character is number or lower-case: not a sentence boundary
-if (tail.match(/^\W*[0-9a-z]/)) {
-return null;
-}
-
-// Do not break in abbreviations. Example D. John, St. Peter
-const lastWord = head.match(/(\w*)$/)[0];
-// Exclude at most 2 letter abbreviations. Examples: T. Dr. St. Jr. Sr. 
Ms. Mr.
-// But not all caps like "UK." as in  "UK. Not US",
-if (lastWord.length <= 2 && lastWord.match(/^\W*[A-Z][a-z]?$/) && 
tail.match(/^\W*[A-Z]/)) {
-return null;
-}
-
-// Include any closing punctuation and trailing space
-return match.index + 1 + tail.match(/^['”"’]*\s*/)[0].length;
-}
-
-/**
- * Find English sentence boundaries
- *
- * TODO: remove when switching to Parsoid. Copied from:
- * 
https://github.com/wikimedia/mediawiki-services-cxserver/blob/0d21a808f7ab6b82086171af927467c1b9460626/segmentation/languages/SegmenterDefault.js
- *
- * @param {string} text The plaintext to segment
- * @returns {number[]} Sentence boundary offsets
- */
-function getBoundaries(text) {
-// Regex to find possible English sentence boundaries.
-// Must not use a shared regex instance (re.lastIndex is used)
-return findAll(text, /[.!?]/g, findBoundary);
-}
-
-function format(extract) {
-const MAX_SENTENCES = 2;
-const cleanStr = removeParens(extract.replace(/\s+/g, ' '));
-const boundaries = getBoundaries(cleanStr);
-const cleanStrEndIndex = boundaries[Math.min(boundaries.length, 
MAX_SENTENCES - 1)];
-
-const ret = cleanStr.slice(0, cleanStrEndIndex).trim();
-if (ret !== '…' && ret !== '..') {
-return ret;
-}
-}
-
-module.exports = {
-format
-};

-- 
To view, visit https://gerrit.wikimedia.org/r/327396
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: I35ee543be4c9ae6d14857fefc1ef42b64f18fa34
Gerrit-PatchSet: 1
Gerrit-Project: mediawiki/services/mobileapps
Gerrit-Branch: master
Gerrit-Owner: BearND 

___
MediaWiki-commits mailing list
MediaW