[MediaWiki-commits] [Gerrit] Lineardoc: Do not skip tags with empty content but with attr... - change (mediawiki...cxserver)

2015-07-06 Thread jenkins-bot (Code Review)
jenkins-bot has submitted this change and it was merged.

Change subject: Lineardoc: Do not skip tags with empty content but with 
attributes
..


Lineardoc: Do not skip tags with empty content but with attributes

Those attributes are important and sometimes contain information for
parsoid to work with references or templates.

Added tests to verify lineardoc is not eating up empty tags

While fixing this, another bug noticed. isReference method in
Utils.js was not considering references in sup tag. This was causing
the links inside sup treated as usual page titles. Fixed that too
so that the tests pass.

Bug: T104539
Change-Id: I4cca925cb35a00fa27fd31380fae90e1c4c2767f
---
M lineardoc/Builder.js
M lineardoc/Parser.js
M lineardoc/Utils.js
M tests/lineardoc/LinearDoc.test.js
A tests/lineardoc/data/test3-result.xhtml
A tests/lineardoc/data/test3-result.xml
A tests/lineardoc/data/test3.xhtml
M tests/segmentation/data/result-19.html
M tests/segmentation/data/result-22.html
9 files changed, 97 insertions(+), 9 deletions(-)

Approvals:
  Nikerabbit: Looks good to me, approved
  jenkins-bot: Verified



diff --git a/lineardoc/Builder.js b/lineardoc/Builder.js
index 15f806c..8874681 100644
--- a/lineardoc/Builder.js
+++ b/lineardoc/Builder.js
@@ -58,7 +58,14 @@
'Mismatched inline tags: open=' + ( tag  tag.name ) + 
', close=' + tagName
);
}
-   if ( tag.name !== 'span' || !tag.attributes[ 'data-mw' ] ) {
+
+   if ( !Object.keys( tag.attributes ).length ) {
+   // Skip tags which have attributes, content or both from 
further check to
+   // see if we want to keep inline content. Else we assume that, 
if this tag has
+   // whitespace or empty content, it is ok to remove it from 
resulting document.
+   // But if it has attributes, we make sure that there are inline 
content blocks to
+   // avoid them missing in resulting document.
+   // See T104539
return tag;
}
// Check for empty/whitespace-only data span. Replace as inline content
diff --git a/lineardoc/Parser.js b/lineardoc/Parser.js
index d541cc3..d734f70 100644
--- a/lineardoc/Parser.js
+++ b/lineardoc/Parser.js
@@ -86,9 +86,9 @@
this.builder.popBlockTag( 'div' );
}
} else if ( isAnn  this.builder.parent !== null ) {
-   // In a sub document: should be a span that closes a reference
-   if ( tagName !== 'span' ) {
-   throw new Error( 'Expected close reference span, got ' 
+ tagName + '' );
+   // In a sub document: should be a span or sup that closes a 
reference
+   if ( tagName !== 'span'  tagName !== 'sup' ) {
+   throw new Error( 'Expected close reference - span or 
sup tags, got ' + tagName + '' );
}
this.builder.finishTextBlock();
this.builder.parent.addInlineContent( this.builder.doc );
diff --git a/lineardoc/Utils.js b/lineardoc/Utils.js
index 239dfd2..aff8d60 100644
--- a/lineardoc/Utils.js
+++ b/lineardoc/Utils.js
@@ -141,9 +141,11 @@
  */
 function isReference( tag ) {
if ( tag.name === 'span'  tag.attributes.typeof === 
'mw:Extension/ref' ) {
+   // See 
https://www.mediawiki.org/wiki/Parsoid/MediaWiki_DOM_spec#Ref_and_References
return true;
-   } else if ( tag.name === 'span'  tag.attributes.class === 'reference' 
) {
-   // TODO: This is in the tests, but is it correct behaviour?
+   } else if ( tag.name === 'sup'  tag.attributes.class === 'reference' 
) {
+   // See cite_reference_link message of Cite extension
+   // https://www.mediawiki.org/wiki/Extension:Cite
return true;
}
return false;
diff --git a/tests/lineardoc/LinearDoc.test.js 
b/tests/lineardoc/LinearDoc.test.js
index 745fb36..13a1007 100644
--- a/tests/lineardoc/LinearDoc.test.js
+++ b/tests/lineardoc/LinearDoc.test.js
@@ -6,7 +6,7 @@
 QUnit.test( 'LinearDoc tests', function ( assert ) {
var parser, testXhtmlFile, resultXmlFile, resultXhtmlFile, testXhtml, 
resultXml,
resultXhtml, i,
-   numTests = 2;
+   numTests = 3;
QUnit.expect( 2 * numTests );
for ( i = 1; i = numTests; i++ ) {
testXhtmlFile = __dirname + '/data/test' + i + '.xhtml';
diff --git a/tests/lineardoc/data/test3-result.xhtml 
b/tests/lineardoc/data/test3-result.xhtml
new file mode 100644
index 000..d6ac7fa
--- /dev/null
+++ b/tests/lineardoc/data/test3-result.xhtml
@@ -0,0 +1,14 @@
+html
+head
+meta charset=UTF-8 /
+titletest/title
+/head
+body
+ol
+li about=#cite_note-5 id=cite_note-5
+a href=#cite_ref-5 rel=mw:referencedByspan class=mw-linkback-text↑ 
/span/aa href=#cite_ref-5 rel=mw:referencedBy/a
+span 

[MediaWiki-commits] [Gerrit] Lineardoc: Do not skip tags with empty content but with attr... - change (mediawiki...cxserver)

2015-07-02 Thread Santhosh (Code Review)
Santhosh has uploaded a new change for review.

  https://gerrit.wikimedia.org/r/63

Change subject: Lineardoc: Do not skip tags with empty content but with 
attributes
..

Lineardoc: Do not skip tags with empty content but with attributes

Those attributes are important and sometimes contain information for
parsoid to work with references or templates.

Added tests to verify lineardoc is not eating up empty tags

Bug: T104539
Change-Id: I4cca925cb35a00fa27fd31380fae90e1c4c2767f
---
M lineardoc/Builder.js
M tests/lineardoc/LinearDoc.test.js
A tests/lineardoc/data/test3-result.xhtml
A tests/lineardoc/data/test3-result.xml
A tests/lineardoc/data/test3.xhtml
5 files changed, 85 insertions(+), 2 deletions(-)


  git pull ssh://gerrit.wikimedia.org:29418/mediawiki/services/cxserver 
refs/changes/63/63/1

diff --git a/lineardoc/Builder.js b/lineardoc/Builder.js
index 15f806c..5df6a53 100644
--- a/lineardoc/Builder.js
+++ b/lineardoc/Builder.js
@@ -58,7 +58,11 @@
'Mismatched inline tags: open=' + ( tag  tag.name ) + 
', close=' + tagName
);
}
-   if ( tag.name !== 'span' || !tag.attributes[ 'data-mw' ] ) {
+
+   if ( !Object.keys( tag.attributes).length ) {
+   // if the tag has no attributes, we can safeley assume that
+   // we not check for empy/whitespace only tags. Otherwise, we 
might be skipping
+   // important tags just because it has no content. See Bug 
T104539
return tag;
}
// Check for empty/whitespace-only data span. Replace as inline content
diff --git a/tests/lineardoc/LinearDoc.test.js 
b/tests/lineardoc/LinearDoc.test.js
index 745fb36..13a1007 100644
--- a/tests/lineardoc/LinearDoc.test.js
+++ b/tests/lineardoc/LinearDoc.test.js
@@ -6,7 +6,7 @@
 QUnit.test( 'LinearDoc tests', function ( assert ) {
var parser, testXhtmlFile, resultXmlFile, resultXhtmlFile, testXhtml, 
resultXml,
resultXhtml, i,
-   numTests = 2;
+   numTests = 3;
QUnit.expect( 2 * numTests );
for ( i = 1; i = numTests; i++ ) {
testXhtmlFile = __dirname + '/data/test' + i + '.xhtml';
diff --git a/tests/lineardoc/data/test3-result.xhtml 
b/tests/lineardoc/data/test3-result.xhtml
new file mode 100644
index 000..d6ac7fa
--- /dev/null
+++ b/tests/lineardoc/data/test3-result.xhtml
@@ -0,0 +1,14 @@
+html
+head
+meta charset=UTF-8 /
+titletest/title
+/head
+body
+ol
+li about=#cite_note-5 id=cite_note-5
+a href=#cite_ref-5 rel=mw:referencedByspan class=mw-linkback-text↑ 
/span/aa href=#cite_ref-5 rel=mw:referencedBy/a
+span class=mw-reference-text data-parsoid={} 
id=mw-reference-text-cite_note-5a 
data-parsoid={#34;targetOff#34;:4337,#34;contentOffsets#34;:[4337,4337],#34;dsr#34;:[4248,4338,89,1]}
 
href=http://lhermanto-resepkeluarga.blogspot.com/2008/04/telor-bumbu-bali-bali-seasoning.html;
 rel=mw:ExtLink/a/span
+/li
+/ol
+/body
+/html
diff --git a/tests/lineardoc/data/test3-result.xml 
b/tests/lineardoc/data/test3-result.xml
new file mode 100644
index 000..f0bc5b3
--- /dev/null
+++ b/tests/lineardoc/data/test3-result.xml
@@ -0,0 +1,51 @@
+html
+cxblockspace/
+head
+meta charset=UTF-8 /
+stylecxtextblock { border: solid #88f 1px }
+cxtextchunk { border-right: solid #f88 1px }/style
+cxtextblock
+  cxtextchunk#10;/cxtextchunk
+  cxinlineelement
+meta/
+  /cxinlineelement
+  cxtextchunk#10;/cxtextchunk
+/cxtextblock
+title
+cxtextblock
+  cxtextchunktest/cxtextchunk
+/cxtextblock
+/title
+cxblockspace/
+/head
+cxblockspace/
+body
+cxblockspace/
+ol
+cxblockspace/
+li
+cxtextblock
+  cxtextchunk#10;/cxtextchunk
+  cxtextchunk tags=a:href=#cite_ref-5,rel=mw:referencedBy 
span:class=mw-linkback-text↑ /cxtextchunk
+  cxinlineelement
+a
+cxtextblock
+/cxtextblock
+/a
+  /cxinlineelement
+  cxtextchunk#10;/cxtextchunk
+  cxinlineelement 
tags=span:id=mw-reference-text-cite_note-5,class=mw-reference-text,data-parsoid={}
+a
+cxtextblock
+/cxtextblock
+/a
+  /cxinlineelement
+  cxtextchunk#10;/cxtextchunk
+/cxtextblock
+/li
+cxblockspace/
+/ol
+cxblockspace/
+/body
+cxblockspace/
+/html
diff --git a/tests/lineardoc/data/test3.xhtml b/tests/lineardoc/data/test3.xhtml
new file mode 100644
index 000..03339e0
--- /dev/null
+++ b/tests/lineardoc/data/test3.xhtml
@@ -0,0 +1,14 @@
+html
+head
+meta charset=UTF-8 /
+titletest/title
+/head
+body
+ol
+li about=#cite_note-5 id=cite_note-5
+a href=#cite_ref-5 rel=mw:referencedByspan class=mw-linkback-text↑ 
/span/a
+span id=mw-reference-text-cite_note-5 class=mw-reference-text 
data-parsoid={}a rel=mw:ExtLink 
href=http://lhermanto-resepkeluarga.blogspot.com/2008/04/telor-bumbu-bali-bali-seasoning.html;
 
data-parsoid={quot;targetOffquot;:4337,quot;contentOffsetsquot;:[4337,4337],quot;dsrquot;:[4248,4338,89,1]}/a/span
+/li
+/ol
+/body
+/html

-- 
To view, visit