Santhosh has uploaded a new change for review.

  https://gerrit.wikimedia.org/r/124298

Change subject: Move all test htmls to html files to avoid difficult to read 
escaped html
......................................................................

Move all test htmls to html files to avoid difficult to read escaped html

Change-Id: I4ee76ef58c5b3ab171269dbbe1a083dbb78382ec
---
M tests/segmentation/CXSegmenter.test.js
M tests/segmentation/SegmentationTests.json
A tests/segmentation/data/result-1.html
A tests/segmentation/data/result-10.html
A tests/segmentation/data/result-11.html
A tests/segmentation/data/result-12.html
A tests/segmentation/data/result-13.html
A tests/segmentation/data/result-14.html
A tests/segmentation/data/result-2.html
A tests/segmentation/data/result-3.html
A tests/segmentation/data/result-4.html
A tests/segmentation/data/result-5.html
A tests/segmentation/data/result-6.html
A tests/segmentation/data/result-7.html
A tests/segmentation/data/result-8.html
A tests/segmentation/data/result-9.html
A tests/segmentation/data/result-debian-1.html
A tests/segmentation/data/test-1.html
A tests/segmentation/data/test-10.html
A tests/segmentation/data/test-11.html
A tests/segmentation/data/test-12.html
A tests/segmentation/data/test-13.html
A tests/segmentation/data/test-14.html
A tests/segmentation/data/test-2.html
A tests/segmentation/data/test-3.html
A tests/segmentation/data/test-4.html
A tests/segmentation/data/test-5.html
A tests/segmentation/data/test-6.html
A tests/segmentation/data/test-7.html
A tests/segmentation/data/test-8.html
A tests/segmentation/data/test-9.html
A tests/segmentation/data/test-debian-1.html
32 files changed, 270 insertions(+), 34 deletions(-)


  git pull ssh://gerrit.wikimedia.org:29418/mediawiki/services/cxserver 
refs/changes/98/124298/1

diff --git a/tests/segmentation/CXSegmenter.test.js 
b/tests/segmentation/CXSegmenter.test.js
index 2b3ae2b..4ce33f9 100644
--- a/tests/segmentation/CXSegmenter.test.js
+++ b/tests/segmentation/CXSegmenter.test.js
@@ -1,5 +1,6 @@
 QUnit.module( 'CXSegmenter' );
 
+var fs = require( 'fs' );
 QUnit.test( 'Segmentation tests', function ( assert ) {
        var i, len, lang, test, tests, segmenter, result,
                count = 0,
@@ -13,11 +14,14 @@
                tests = allTests[ lang ];
                for ( i = 0, len = allTests[ lang ].length; i < len; i++ ) {
                        test = tests[ i ];
-                       segmenter = new CX.Segmenter( test.source, lang );
+                       testData = fs.readFileSync( __dirname + '/data/' + 
test.source, 'utf8' );
+                       segmenter = new CX.Segmenter( testData, lang );
                        segmenter.segment();
                        result = segmenter.getSegmentedContent();
                        result = result.replace( /(\r\n|\n|\t|\r)/gm, '' );
-                       assert.strictEqual( result, test.result, test.desc || 
'' );
+                       expectedResultData = fs.readFileSync( __dirname + 
'/data/' + test.result, 'utf8' );
+                       expectedResultData = expectedResultData.replace( 
/(\r\n|\n|\t|\r)/gm, '' );
+                       assert.strictEqual( result, expectedResultData, 
test.desc || '' );
                }
        }
 } );
diff --git a/tests/segmentation/SegmentationTests.json 
b/tests/segmentation/SegmentationTests.json
index da8d73f..205bf1f 100644
--- a/tests/segmentation/SegmentationTests.json
+++ b/tests/segmentation/SegmentationTests.json
@@ -1,72 +1,81 @@
 {
        "en": [
                {
-                       "source": "<p>A simple paragraph.</p>",
-                       "result": "<p id=\"0\"><span class=\"cx-segment\" 
data-segmentid=\"1\">A simple paragraph.</span></p>"
+                       "desc": "Simple paragraph test",
+                       "source": "test-1.html",
+                       "result": "result-1.html"
                },
                {
-                       "source": "<p>Hello! Mr. D. John, How are you?</p>",
-                       "result": "<p id=\"0\"><span class=\"cx-segment\" 
data-segmentid=\"1\">Hello! </span><span class=\"cx-segment\" 
data-segmentid=\"2\">Mr. D. John, How are you?</span></p>"
+                       "desc": "Exclamation, punctuation test",
+                       "source": "test-2.html",
+                       "result": "result-2.html"
                },
                {
-                       "source": "<p>This is first sentence. This is second 
sentence.</p>",
-                       "result": "<p id=\"0\"><span class=\"cx-segment\" 
data-segmentid=\"1\">This is first sentence. </span><span class=\"cx-segment\" 
data-segmentid=\"2\">This is second sentence.</span></p>"
+                       "desc": "Multiple sentences",
+                       "source": "test-3.html",
+                       "result": "result-3.html"
                },
                {
-                       "source": "<p>This is first sentence. This is second 
sentence</p>",
-                       "result": "<p id=\"0\"><span class=\"cx-segment\" 
data-segmentid=\"1\">This is first sentence. </span><span class=\"cx-segment\" 
data-segmentid=\"2\">This is second sentence</span></p>"
+                       "desc": "Multiple sections - a div and paragraph",
+                       "source": "test-4.html",
+                       "result": "result-4.html"
                },
                {
-                       "source": "<div>Some div</div><p>This is first 
sentence. This is second sentence</p>",
-                       "result": "<div id=\"0\"><span class=\"cx-segment\" 
data-segmentid=\"1\">Some div</span></div><p id=\"2\"><span 
class=\"cx-segment\" data-segmentid=\"3\">This is first sentence. </span><span 
class=\"cx-segment\" data-segmentid=\"4\">This is second sentence</span></p>"
+                       "desc": "Sentence starting with a link",
+                       "source": "test-5.html",
+                       "result": "result-5.html"
                },
                {
-                       "source": "<p><a href=\"#\">Hydrogen</a> is a gas</p>",
-                       "result": "<p id=\"0\"><span class=\"cx-segment\" 
data-segmentid=\"1\"><a class=\"cx-link\" data-linkid=\"2\" 
href=\"#\">Hydrogen</a> is a gas</span></p>"
+                       "desc": "Sentence starting with a link and ending with 
a link",
+                       "source": "test-6.html",
+                       "result": "result-6.html"
                },
                {
-                       "source": "<p><a href=\"#\">Hydrogen</a> is a <a 
href=\"#\">gas</a></p>",
-                       "result": "<p id=\"0\"><span class=\"cx-segment\" 
data-segmentid=\"1\"><a class=\"cx-link\" data-linkid=\"2\" 
href=\"#\">Hydrogen</a> is a <a class=\"cx-link\" data-linkid=\"3\" 
href=\"#\">gas</a></span></p>"
-               },
-               {
-                       "source": "<figure><a href=\"#\"><img 
src=\"img.png\"></a><figcaption>Figure caption</figcaption></figure>",
-                       "result": "<figure id=\"0\"><span class=\"cx-segment\" 
data-segmentid=\"1\"><a class=\"cx-link\" data-linkid=\"2\" href=\"#\"><img 
src=\"img.png\"></img></a></span><figcaption id=\"3\"><span 
class=\"cx-segment\" data-segmentid=\"4\">Figure 
caption</span></figcaption></figure>"
+                       "desc": "Figure",
+                       "source": "test-7.html",
+                       "result": "result-7.html"
                },
                {
                        "desc": "References can appear after period without 
space. Example: Hydrogen is a gas.[1] It is ...",
-                       "source": "<p>Sentence one.<span class=\"reference\"><a 
href=\"#\">reference</a></span> Starts with reference</p>",
-                       "result": "<p id=\"0\"><span class=\"cx-segment\" 
data-segmentid=\"1\">Sentence one.<span class=\"reference\"><a 
class=\"cx-link\" data-linkid=\"2\" href=\"#\">reference</a></span></span><span 
class=\"cx-segment\" data-segmentid=\"3\"> Starts with reference</span></p>"
+                       "source": "test-8.html",
+                       "result": "result-8.html"
                },
                {
                        "desc": "References can appear after period without 
space, repeated. Example: Hydrogen is a gas.[1][2][3] It is ...",
-                       "source": "<p>Sentence one.<span class=\"reference\"><a 
href=\"#\">1</a></span><span class=\"reference\"><a 
href=\"#\">2</a></span><span class=\"reference\"><a href=\"#\">3</a></span> 
Starts with reference</p>",
-                       "result": "<p id=\"0\"><span class=\"cx-segment\" 
data-segmentid=\"1\">Sentence one.<span class=\"reference\"><a 
class=\"cx-link\" data-linkid=\"2\" href=\"#\">1</a></span><span 
class=\"reference\"><a class=\"cx-link\" data-linkid=\"3\" 
href=\"#\">2</a></span><span class=\"reference\"><a class=\"cx-link\" 
data-linkid=\"4\" href=\"#\">3</a></span></span><span class=\"cx-segment\" 
data-segmentid=\"5\"> Starts with reference</span></p>"
+                       "source": "test-9.html",
+                       "result": "result-9.html"
                },
                {
                        "desc": "References can appear inside a sentence. 
Example: Hydrogen is a gas[1] and it is ...",
-                       "source": "<p>Sentence one<span class=\"reference\"><a 
href=\"#\">1</a></span> and rest of sentence</p>",
-                       "result": "<p id=\"0\"><span class=\"cx-segment\" 
data-segmentid=\"1\">Sentence one<span class=\"reference\"><a class=\"cx-link\" 
data-linkid=\"2\" href=\"#\">1</a></span> and rest of sentence</span></p>"
+                       "source": "test-10.html",
+                       "result": "result-10.html"
                },
                {
                        "desc": "References can appear inside a sentence. 
Example:Hydrogen's atomic number is one. Hydrogen is a gas[1] and it is ..... 
Test the flags are reset properly",
-                       "source": "<p>Hydrogen's atomic number is one.<span 
class=\"reference\"><a href=\"#\">1</a></span> Hydrogen is a gas<span 
class=\"reference\"><a href=\"#\">2</a></span> and it is</p>",
-                       "result": "<p id=\"0\"><span class=\"cx-segment\" 
data-segmentid=\"1\">Hydrogen's atomic number is one.<span 
class=\"reference\"><a class=\"cx-link\" data-linkid=\"2\" 
href=\"#\">1</a></span></span><span class=\"cx-segment\" data-segmentid=\"3\"> 
Hydrogen is a gas<span class=\"reference\"><a class=\"cx-link\" 
data-linkid=\"4\" href=\"#\">2</a></span> and it is</span></p>"
+                       "source": "test-11.html",
+                       "result": "result-11.html"
                },
                {
                        "desc": "References can appear repeated inside a 
sentence. Example: Hydrogen is a gas[1][2][3] and it is ...",
-                       "source": "<p>Sentence one<span class=\"reference\"><a 
href=\"#\">1</a></span><span class=\"reference\"><a 
href=\"#\">2</a></span><span class=\"reference\"><a href=\"#\">3</a></span> and 
rest of sentence</p>",
-                       "result": "<p id=\"0\"><span class=\"cx-segment\" 
data-segmentid=\"1\">Sentence one<span class=\"reference\"><a class=\"cx-link\" 
data-linkid=\"2\" href=\"#\">1</a></span><span class=\"reference\"><a 
class=\"cx-link\" data-linkid=\"3\" href=\"#\">2</a></span><span 
class=\"reference\"><a class=\"cx-link\" data-linkid=\"4\" 
href=\"#\">3</a></span> and rest of sentence</span></p>"
+                       "source": "test-12.html",
+                       "result": "result-12.html"
                },
                {
                        "desc": "References can appear after period and space. 
Example: Hydrogen is a gas. [1] It is .... In this case we dont have any choice 
than considering [1] as part of second sentence",
-                       "source": "<p>Sentence one. <span 
class=\"reference\"><a href=\"#\">reference</a></span> Starts with 
reference</p>",
-                       "result": "<p id=\"0\"><span class=\"cx-segment\" 
data-segmentid=\"1\">Sentence one. </span><span class=\"cx-segment\" 
data-segmentid=\"2\"><span class=\"reference\"><a class=\"cx-link\" 
data-linkid=\"3\" href=\"#\">reference</a></span> Starts with 
reference</span></p>"
+                       "source": "test-13.html",
+                       "result": "result-13.html"
+               },
+               {
+                       "desc": "Paragraph from Debian article- repeating 
references",
+                       "source": "test-debian-1.html",
+                       "result": "result-debian-1.html"
                }
        ],
        "hi": [
                {
-                       "source": "<p>माउज़र पिस्तौल (<a 
href=\"#\">अंग्रेजी</a>: Mauser C96) मूल रूप से जर्मनी में बनी एक अर्द्ध 
स्वचालित पिस्तौल है। इस पिस्तौल का डिजाइन जर्मनी निवासी दो माउज़र बन्धुओं ने 
सन् 1895 में तैयार किया था।</p>",
-                       "result": "<p id=\"0\"><span class=\"cx-segment\" 
data-segmentid=\"1\">माउज़र पिस्तौल (<a class=\"cx-link\" data-linkid=\"2\" 
href=\"#\">अंग्रेजी</a>: Mauser C96) मूल रूप से जर्मनी में बनी एक अर्द्ध 
स्वचालित पिस्तौल है। </span><span class=\"cx-segment\" data-segmentid=\"3\">इस 
पिस्तौल का डिजाइन जर्मनी निवासी दो माउज़र बन्धुओं ने सन् 1895 में तैयार किया 
था।</span></p>"
+                       "desc": "Hindi segmentation - basic test",
+                       "source": "test-14.html",
+                       "result": "result-14.html"
                }
        ]
 }
diff --git a/tests/segmentation/data/result-1.html 
b/tests/segmentation/data/result-1.html
new file mode 100644
index 0000000..469058c
--- /dev/null
+++ b/tests/segmentation/data/result-1.html
@@ -0,0 +1,8 @@
+<p id="0">
+       <span class="cx-segment" data-segmentid="1">A simple paragraph.</span>
+</p>
+
+
+
+
+
diff --git a/tests/segmentation/data/result-10.html 
b/tests/segmentation/data/result-10.html
new file mode 100644
index 0000000..96b3225
--- /dev/null
+++ b/tests/segmentation/data/result-10.html
@@ -0,0 +1,7 @@
+<p id="0">
+       <span class="cx-segment" data-segmentid="1">Sentence one
+               <span class="reference">
+                       <a class="cx-link" data-linkid="2" href="#">1</a>
+               </span>and rest of sentence</span>
+</p>
+
diff --git a/tests/segmentation/data/result-11.html 
b/tests/segmentation/data/result-11.html
new file mode 100644
index 0000000..969c3c1
--- /dev/null
+++ b/tests/segmentation/data/result-11.html
@@ -0,0 +1,12 @@
+<p id="0">
+       <span class="cx-segment" data-segmentid="1">
+               Hydrogen's atomic number is one.<span class="reference">
+                       <a class="cx-link" data-linkid="2" href="#">1</a>
+               </span>
+       </span>
+       <span class="cx-segment" data-segmentid="3">Hydrogen is a gas
+               <span class="reference">
+                       <a class="cx-link" data-linkid="4" href="#">2</a>
+               </span>and it is
+       </span>
+</p>
diff --git a/tests/segmentation/data/result-12.html 
b/tests/segmentation/data/result-12.html
new file mode 100644
index 0000000..d9a1844
--- /dev/null
+++ b/tests/segmentation/data/result-12.html
@@ -0,0 +1,12 @@
+<p id="0">
+       <span class="cx-segment" data-segmentid="1">Sentence one <span 
class="reference">
+                       <a class="cx-link" data-linkid="2" href="#">1</a>
+               </span>
+               <span class="reference">
+                       <a class="cx-link" data-linkid="3" href="#">2</a>
+               </span>
+               <span class="reference">
+                       <a class="cx-link" data-linkid="4" href="#">3</a>
+               </span> and rest of sentence</span>
+</p>
+
diff --git a/tests/segmentation/data/result-13.html 
b/tests/segmentation/data/result-13.html
new file mode 100644
index 0000000..43e0003
--- /dev/null
+++ b/tests/segmentation/data/result-13.html
@@ -0,0 +1,8 @@
+<p id="0">
+       <span class="cx-segment" data-segmentid="1">Sentence one. </span>
+       <span class="cx-segment" data-segmentid="2">
+               <span class="reference">
+                       <a class="cx-link" data-linkid="3" 
href="#">reference</a>
+               </span> Starts with reference</span>
+</p>
+
diff --git a/tests/segmentation/data/result-14.html 
b/tests/segmentation/data/result-14.html
new file mode 100644
index 0000000..6e4f0fe
--- /dev/null
+++ b/tests/segmentation/data/result-14.html
@@ -0,0 +1,5 @@
+<p id="0">
+       <span class="cx-segment" data-segmentid="1">माउज़र पिस्तौल (
+               <a class="cx-link" data-linkid="2" href="#">अंग्रेजी</a>: 
Mauser C96) मूल रूप से जर्मनी में बनी एक अर्द्ध स्वचालित पिस्तौल है। </span>
+       <span class="cx-segment" data-segmentid="3">इस पिस्तौल का डिजाइन जर्मनी 
निवासी दो माउज़र बन्धुओं ने सन् 1895 में तैयार किया था।</span>
+</p>
diff --git a/tests/segmentation/data/result-2.html 
b/tests/segmentation/data/result-2.html
new file mode 100644
index 0000000..9c6b813
--- /dev/null
+++ b/tests/segmentation/data/result-2.html
@@ -0,0 +1,6 @@
+<p id="0">
+       <span class="cx-segment" data-segmentid="1">Hello! </span>
+       <span class="cx-segment" data-segmentid="2">Mr. D. John, How are 
you?</span>
+</p>
+
+
diff --git a/tests/segmentation/data/result-3.html 
b/tests/segmentation/data/result-3.html
new file mode 100644
index 0000000..d0cc742
--- /dev/null
+++ b/tests/segmentation/data/result-3.html
@@ -0,0 +1,5 @@
+<p id="0">
+       <span class="cx-segment" data-segmentid="1">This is first sentence. 
</span>
+       <span class="cx-segment" data-segmentid="2">This is second 
sentence.</span>
+</p>
+
diff --git a/tests/segmentation/data/result-4.html 
b/tests/segmentation/data/result-4.html
new file mode 100644
index 0000000..ae18988
--- /dev/null
+++ b/tests/segmentation/data/result-4.html
@@ -0,0 +1,8 @@
+<div id="0">
+       <span class="cx-segment" data-segmentid="1">Some div</span>
+</div>
+<p id="2">
+       <span class="cx-segment" data-segmentid="3">This is first sentence. 
</span>
+       <span class="cx-segment" data-segmentid="4">This is second 
sentence</span>
+</p>
+
diff --git a/tests/segmentation/data/result-5.html 
b/tests/segmentation/data/result-5.html
new file mode 100644
index 0000000..72bde12
--- /dev/null
+++ b/tests/segmentation/data/result-5.html
@@ -0,0 +1,5 @@
+<p id="0">
+       <span class="cx-segment" data-segmentid="1">
+               <a class="cx-link" data-linkid="2" href="#">Hydrogen</a>is a 
gas</span>
+</p>
+
diff --git a/tests/segmentation/data/result-6.html 
b/tests/segmentation/data/result-6.html
new file mode 100644
index 0000000..c53e527
--- /dev/null
+++ b/tests/segmentation/data/result-6.html
@@ -0,0 +1,7 @@
+<p id="0">
+       <span class="cx-segment" data-segmentid="1">
+               <a class="cx-link" data-linkid="2" href="#">Hydrogen</a>is a
+               <a class="cx-link" data-linkid="3" href="#">gas</a>
+       </span>
+</p>
+
diff --git a/tests/segmentation/data/result-7.html 
b/tests/segmentation/data/result-7.html
new file mode 100644
index 0000000..0fdc527
--- /dev/null
+++ b/tests/segmentation/data/result-7.html
@@ -0,0 +1,11 @@
+<figure id="0">
+       <span class="cx-segment" data-segmentid="1">
+               <a class="cx-link" data-linkid="2" href="#">
+                       <img src="img.png"></img>
+               </a>
+       </span>
+       <figcaption id="3">
+               <span class="cx-segment" data-segmentid="4">Figure 
caption</span>
+       </figcaption>
+</figure>
+
diff --git a/tests/segmentation/data/result-8.html 
b/tests/segmentation/data/result-8.html
new file mode 100644
index 0000000..4832ee4
--- /dev/null
+++ b/tests/segmentation/data/result-8.html
@@ -0,0 +1,9 @@
+<p id="0">
+       <span class="cx-segment" data-segmentid="1">Sentence one.
+               <span class="reference">
+                       <a class="cx-link" data-linkid="2" 
href="#">reference</a>
+               </span>
+       </span>
+       <span class="cx-segment" data-segmentid="3"> Starts with 
reference</span>
+</p>
+
diff --git a/tests/segmentation/data/result-9.html 
b/tests/segmentation/data/result-9.html
new file mode 100644
index 0000000..87d2f3b
--- /dev/null
+++ b/tests/segmentation/data/result-9.html
@@ -0,0 +1,15 @@
+<p id="0">
+       <span class="cx-segment" data-segmentid="1">Sentence one.
+               <span class="reference">
+                       <a class="cx-link" data-linkid="2" href="#">1</a>
+               </span>
+               <span class="reference">
+                       <a class="cx-link" data-linkid="3" href="#">2</a>
+               </span>
+               <span class="reference">
+                       <a class="cx-link" data-linkid="4" href="#">3</a>
+               </span>
+       </span>
+       <span class="cx-segment" data-segmentid="5"> Starts with 
reference</span>
+</p>
+
diff --git a/tests/segmentation/data/result-debian-1.html 
b/tests/segmentation/data/result-debian-1.html
new file mode 100644
index 0000000..57daee5
--- /dev/null
+++ b/tests/segmentation/data/result-debian-1.html
@@ -0,0 +1,25 @@
+<p id="0">
+       <span class="cx-segment" data-segmentid="1">Debian offers
+               <i>stable</i>and
+               <i>testing</i>CD images specifically built for
+               <a class="cx-link" data-linkid="2" rel="mw:WikiLink" 
href="./GNOME">GNOME</a>(the default),
+               <a class="cx-link" data-linkid="3" rel="mw:WikiLink" 
href="./KDE_Plasma_Workspaces">KDE Plasma Workspaces</a>,
+               <a class="cx-link" data-linkid="4" rel="mw:WikiLink" 
href="./Xfce">Xfce</a>and
+               <a class="cx-link" data-linkid="5" rel="mw:WikiLink" 
href="./LXDE">LXDE</a>.
+               <span about="#mwt72" class="reference">
+                       <a class="cx-link" data-linkid="6" 
href="#cite_note-25">[25]</a>
+               </span>
+               <span about="#mwt72" class="reference">
+                       <a class="cx-link" data-linkid="7" 
href="#cite_note-25">[26]</a>
+               </span>
+       </span>
+       <span class="cx-segment" data-segmentid="8"> Less common
+               <a class="cx-link" data-linkid="9" rel="mw:WikiLink" 
href="./Window_manager">window managers</a>such as
+               <a class="cx-link" data-linkid="10" rel="mw:WikiLink" 
href="./Enlightenment_(window_manager)">Enlightenment</a>,
+               <a class="cx-link" data-linkid="11" rel="mw:WikiLink" 
href="./Openbox">Openbox</a>,
+               <a class="cx-link" data-linkid="12" rel="mw:WikiLink" 
href="./Fluxbox">Fluxbox</a>,
+               <a class="cx-link" data-linkid="13" rel="mw:WikiLink" 
href="./GNUstep">GNUstep</a>,
+               <a class="cx-link" data-linkid="14" rel="mw:WikiLink" 
href="./IceWM">IceWM</a>,
+               <a class="cx-link" data-linkid="15" rel="mw:WikiLink" 
href="./Window_Maker">Window Maker</a>and others can also be installed.</span>
+</p>
+
diff --git a/tests/segmentation/data/test-1.html 
b/tests/segmentation/data/test-1.html
new file mode 100644
index 0000000..84dcde1
--- /dev/null
+++ b/tests/segmentation/data/test-1.html
@@ -0,0 +1 @@
+<p>A simple paragraph.</p>
diff --git a/tests/segmentation/data/test-10.html 
b/tests/segmentation/data/test-10.html
new file mode 100644
index 0000000..5200c64
--- /dev/null
+++ b/tests/segmentation/data/test-10.html
@@ -0,0 +1,5 @@
+<p>Sentence one
+       <span class="reference">
+               <a href="#">1</a>
+       </span>and rest of sentence</p>
+
diff --git a/tests/segmentation/data/test-11.html 
b/tests/segmentation/data/test-11.html
new file mode 100644
index 0000000..46b062a
--- /dev/null
+++ b/tests/segmentation/data/test-11.html
@@ -0,0 +1,9 @@
+<p>
+       Hydrogen's atomic number is one.<span class="reference">
+               <a href="#">1</a>
+       </span>Hydrogen is a gas<span class="reference">
+               <a href="#">2</a>
+       </span>
+       and it is
+</p>
+
diff --git a/tests/segmentation/data/test-12.html 
b/tests/segmentation/data/test-12.html
new file mode 100644
index 0000000..510ee41
--- /dev/null
+++ b/tests/segmentation/data/test-12.html
@@ -0,0 +1,11 @@
+<p>Sentence one <span class="reference">
+               <a href="#">1</a>
+       </span>
+       <span class="reference">
+               <a href="#">2</a>
+       </span>
+       <span class="reference">
+               <a href="#">3</a>
+       </span> and rest of sentence
+</p>
+
diff --git a/tests/segmentation/data/test-13.html 
b/tests/segmentation/data/test-13.html
new file mode 100644
index 0000000..413b752
--- /dev/null
+++ b/tests/segmentation/data/test-13.html
@@ -0,0 +1 @@
+<p>Sentence one. <span class="reference"><a href="#">reference</a></span> 
Starts with reference</p>
diff --git a/tests/segmentation/data/test-14.html 
b/tests/segmentation/data/test-14.html
new file mode 100644
index 0000000..171fd62
--- /dev/null
+++ b/tests/segmentation/data/test-14.html
@@ -0,0 +1,3 @@
+<p>माउज़र पिस्तौल (
+       <a href="#">अंग्रेजी</a>: Mauser C96) मूल रूप से जर्मनी में बनी एक 
अर्द्ध स्वचालित पिस्तौल है। इस पिस्तौल का डिजाइन जर्मनी निवासी दो माउज़र 
बन्धुओं ने सन् 1895 में तैयार किया था।</p>
+
diff --git a/tests/segmentation/data/test-2.html 
b/tests/segmentation/data/test-2.html
new file mode 100644
index 0000000..b2c8369
--- /dev/null
+++ b/tests/segmentation/data/test-2.html
@@ -0,0 +1 @@
+<p>Hello! Mr. D. John, How are you?</p>
diff --git a/tests/segmentation/data/test-3.html 
b/tests/segmentation/data/test-3.html
new file mode 100644
index 0000000..5fa6ef3
--- /dev/null
+++ b/tests/segmentation/data/test-3.html
@@ -0,0 +1 @@
+<p>This is first sentence. This is second sentence.</p>
diff --git a/tests/segmentation/data/test-4.html 
b/tests/segmentation/data/test-4.html
new file mode 100644
index 0000000..d7db0ef
--- /dev/null
+++ b/tests/segmentation/data/test-4.html
@@ -0,0 +1,2 @@
+<div>Some div</div>
+<p>This is first sentence. This is second sentence</p>
diff --git a/tests/segmentation/data/test-5.html 
b/tests/segmentation/data/test-5.html
new file mode 100644
index 0000000..870717c
--- /dev/null
+++ b/tests/segmentation/data/test-5.html
@@ -0,0 +1,3 @@
+<p>
+       <a href="#">Hydrogen</a>is a gas</p>
+
diff --git a/tests/segmentation/data/test-6.html 
b/tests/segmentation/data/test-6.html
new file mode 100644
index 0000000..95aab00
--- /dev/null
+++ b/tests/segmentation/data/test-6.html
@@ -0,0 +1,5 @@
+<p>
+       <a href="#">Hydrogen</a>is a
+       <a href="#">gas</a>
+</p>
+
diff --git a/tests/segmentation/data/test-7.html 
b/tests/segmentation/data/test-7.html
new file mode 100644
index 0000000..5c78e2d
--- /dev/null
+++ b/tests/segmentation/data/test-7.html
@@ -0,0 +1,9 @@
+<figure>
+       <a href="#">
+               <img src="img.png">
+       </a>
+       <figcaption>Figure caption</figcaption>
+</figure>
+
+
+
diff --git a/tests/segmentation/data/test-8.html 
b/tests/segmentation/data/test-8.html
new file mode 100644
index 0000000..06f0e69
--- /dev/null
+++ b/tests/segmentation/data/test-8.html
@@ -0,0 +1,2 @@
+<p>Sentence one.<span class="reference"><a href="#">reference</a></span> 
Starts with reference</p>
+
diff --git a/tests/segmentation/data/test-9.html 
b/tests/segmentation/data/test-9.html
new file mode 100644
index 0000000..c4a22d2
--- /dev/null
+++ b/tests/segmentation/data/test-9.html
@@ -0,0 +1,10 @@
+<p>Sentence one.<span class="reference">
+               <a href="#">1</a>
+       </span>
+       <span class="reference">
+               <a href="#">2</a>
+       </span>
+       <span class="reference">
+               <a href="#">3</a>
+       </span> Starts with reference</p>
+
diff --git a/tests/segmentation/data/test-debian-1.html 
b/tests/segmentation/data/test-debian-1.html
new file mode 100644
index 0000000..ccabacd
--- /dev/null
+++ b/tests/segmentation/data/test-debian-1.html
@@ -0,0 +1,17 @@
+<p>
+       Debian offers
+       <i>stable</i>and
+       <i>testing</i>CD images specifically built for
+       <a rel="mw:WikiLink" href="./GNOME">GNOME</a>(the default),
+       <a rel="mw:WikiLink" href="./KDE_Plasma_Workspaces">KDE Plasma 
Workspaces</a>,
+       <a rel="mw:WikiLink" href="./Xfce">Xfce</a>and
+       <a rel="mw:WikiLink" href="./LXDE">LXDE</a>.<span about="#mwt72" 
class="reference">
+               <a href="#cite_note-25">[25]</a></span>
+       <span about="#mwt72" class="reference"><a 
href="#cite_note-25">[26]</a></span> Less common
+       <a rel="mw:WikiLink" href="./Window_manager">window managers</a>such as
+       <a rel="mw:WikiLink" 
href="./Enlightenment_(window_manager)">Enlightenment</a>,
+       <a rel="mw:WikiLink" href="./Openbox">Openbox</a>,
+       <a rel="mw:WikiLink" href="./Fluxbox">Fluxbox</a>,
+       <a rel="mw:WikiLink" href="./GNUstep">GNUstep</a>,
+       <a rel="mw:WikiLink" href="./IceWM">IceWM</a>,
+       <a rel="mw:WikiLink" href="./Window_Maker">Window Maker</a>and others 
can also be installed.</p>

-- 
To view, visit https://gerrit.wikimedia.org/r/124298
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: I4ee76ef58c5b3ab171269dbbe1a083dbb78382ec
Gerrit-PatchSet: 1
Gerrit-Project: mediawiki/services/cxserver
Gerrit-Branch: master
Gerrit-Owner: Santhosh <santhosh.thottin...@gmail.com>

_______________________________________________
MediaWiki-commits mailing list
MediaWiki-commits@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to