Santhosh has uploaded a new change for review. https://gerrit.wikimedia.org/r/124298
Change subject: Move all test htmls to html files to avoid difficult to read escaped html ...................................................................... Move all test htmls to html files to avoid difficult to read escaped html Change-Id: I4ee76ef58c5b3ab171269dbbe1a083dbb78382ec --- M tests/segmentation/CXSegmenter.test.js M tests/segmentation/SegmentationTests.json A tests/segmentation/data/result-1.html A tests/segmentation/data/result-10.html A tests/segmentation/data/result-11.html A tests/segmentation/data/result-12.html A tests/segmentation/data/result-13.html A tests/segmentation/data/result-14.html A tests/segmentation/data/result-2.html A tests/segmentation/data/result-3.html A tests/segmentation/data/result-4.html A tests/segmentation/data/result-5.html A tests/segmentation/data/result-6.html A tests/segmentation/data/result-7.html A tests/segmentation/data/result-8.html A tests/segmentation/data/result-9.html A tests/segmentation/data/result-debian-1.html A tests/segmentation/data/test-1.html A tests/segmentation/data/test-10.html A tests/segmentation/data/test-11.html A tests/segmentation/data/test-12.html A tests/segmentation/data/test-13.html A tests/segmentation/data/test-14.html A tests/segmentation/data/test-2.html A tests/segmentation/data/test-3.html A tests/segmentation/data/test-4.html A tests/segmentation/data/test-5.html A tests/segmentation/data/test-6.html A tests/segmentation/data/test-7.html A tests/segmentation/data/test-8.html A tests/segmentation/data/test-9.html A tests/segmentation/data/test-debian-1.html 32 files changed, 270 insertions(+), 34 deletions(-) git pull ssh://gerrit.wikimedia.org:29418/mediawiki/services/cxserver refs/changes/98/124298/1 diff --git a/tests/segmentation/CXSegmenter.test.js b/tests/segmentation/CXSegmenter.test.js index 2b3ae2b..4ce33f9 100644 --- a/tests/segmentation/CXSegmenter.test.js +++ b/tests/segmentation/CXSegmenter.test.js @@ -1,5 +1,6 @@ QUnit.module( 'CXSegmenter' ); +var fs = require( 'fs' ); QUnit.test( 'Segmentation tests', function ( assert ) { var i, len, lang, test, tests, segmenter, result, count = 0, @@ -13,11 +14,14 @@ tests = allTests[ lang ]; for ( i = 0, len = allTests[ lang ].length; i < len; i++ ) { test = tests[ i ]; - segmenter = new CX.Segmenter( test.source, lang ); + testData = fs.readFileSync( __dirname + '/data/' + test.source, 'utf8' ); + segmenter = new CX.Segmenter( testData, lang ); segmenter.segment(); result = segmenter.getSegmentedContent(); result = result.replace( /(\r\n|\n|\t|\r)/gm, '' ); - assert.strictEqual( result, test.result, test.desc || '' ); + expectedResultData = fs.readFileSync( __dirname + '/data/' + test.result, 'utf8' ); + expectedResultData = expectedResultData.replace( /(\r\n|\n|\t|\r)/gm, '' ); + assert.strictEqual( result, expectedResultData, test.desc || '' ); } } } ); diff --git a/tests/segmentation/SegmentationTests.json b/tests/segmentation/SegmentationTests.json index da8d73f..205bf1f 100644 --- a/tests/segmentation/SegmentationTests.json +++ b/tests/segmentation/SegmentationTests.json @@ -1,72 +1,81 @@ { "en": [ { - "source": "<p>A simple paragraph.</p>", - "result": "<p id=\"0\"><span class=\"cx-segment\" data-segmentid=\"1\">A simple paragraph.</span></p>" + "desc": "Simple paragraph test", + "source": "test-1.html", + "result": "result-1.html" }, { - "source": "<p>Hello! Mr. D. John, How are you?</p>", - "result": "<p id=\"0\"><span class=\"cx-segment\" data-segmentid=\"1\">Hello! </span><span class=\"cx-segment\" data-segmentid=\"2\">Mr. D. John, How are you?</span></p>" + "desc": "Exclamation, punctuation test", + "source": "test-2.html", + "result": "result-2.html" }, { - "source": "<p>This is first sentence. This is second sentence.</p>", - "result": "<p id=\"0\"><span class=\"cx-segment\" data-segmentid=\"1\">This is first sentence. </span><span class=\"cx-segment\" data-segmentid=\"2\">This is second sentence.</span></p>" + "desc": "Multiple sentences", + "source": "test-3.html", + "result": "result-3.html" }, { - "source": "<p>This is first sentence. This is second sentence</p>", - "result": "<p id=\"0\"><span class=\"cx-segment\" data-segmentid=\"1\">This is first sentence. </span><span class=\"cx-segment\" data-segmentid=\"2\">This is second sentence</span></p>" + "desc": "Multiple sections - a div and paragraph", + "source": "test-4.html", + "result": "result-4.html" }, { - "source": "<div>Some div</div><p>This is first sentence. This is second sentence</p>", - "result": "<div id=\"0\"><span class=\"cx-segment\" data-segmentid=\"1\">Some div</span></div><p id=\"2\"><span class=\"cx-segment\" data-segmentid=\"3\">This is first sentence. </span><span class=\"cx-segment\" data-segmentid=\"4\">This is second sentence</span></p>" + "desc": "Sentence starting with a link", + "source": "test-5.html", + "result": "result-5.html" }, { - "source": "<p><a href=\"#\">Hydrogen</a> is a gas</p>", - "result": "<p id=\"0\"><span class=\"cx-segment\" data-segmentid=\"1\"><a class=\"cx-link\" data-linkid=\"2\" href=\"#\">Hydrogen</a> is a gas</span></p>" + "desc": "Sentence starting with a link and ending with a link", + "source": "test-6.html", + "result": "result-6.html" }, { - "source": "<p><a href=\"#\">Hydrogen</a> is a <a href=\"#\">gas</a></p>", - "result": "<p id=\"0\"><span class=\"cx-segment\" data-segmentid=\"1\"><a class=\"cx-link\" data-linkid=\"2\" href=\"#\">Hydrogen</a> is a <a class=\"cx-link\" data-linkid=\"3\" href=\"#\">gas</a></span></p>" - }, - { - "source": "<figure><a href=\"#\"><img src=\"img.png\"></a><figcaption>Figure caption</figcaption></figure>", - "result": "<figure id=\"0\"><span class=\"cx-segment\" data-segmentid=\"1\"><a class=\"cx-link\" data-linkid=\"2\" href=\"#\"><img src=\"img.png\"></img></a></span><figcaption id=\"3\"><span class=\"cx-segment\" data-segmentid=\"4\">Figure caption</span></figcaption></figure>" + "desc": "Figure", + "source": "test-7.html", + "result": "result-7.html" }, { "desc": "References can appear after period without space. Example: Hydrogen is a gas.[1] It is ...", - "source": "<p>Sentence one.<span class=\"reference\"><a href=\"#\">reference</a></span> Starts with reference</p>", - "result": "<p id=\"0\"><span class=\"cx-segment\" data-segmentid=\"1\">Sentence one.<span class=\"reference\"><a class=\"cx-link\" data-linkid=\"2\" href=\"#\">reference</a></span></span><span class=\"cx-segment\" data-segmentid=\"3\"> Starts with reference</span></p>" + "source": "test-8.html", + "result": "result-8.html" }, { "desc": "References can appear after period without space, repeated. Example: Hydrogen is a gas.[1][2][3] It is ...", - "source": "<p>Sentence one.<span class=\"reference\"><a href=\"#\">1</a></span><span class=\"reference\"><a href=\"#\">2</a></span><span class=\"reference\"><a href=\"#\">3</a></span> Starts with reference</p>", - "result": "<p id=\"0\"><span class=\"cx-segment\" data-segmentid=\"1\">Sentence one.<span class=\"reference\"><a class=\"cx-link\" data-linkid=\"2\" href=\"#\">1</a></span><span class=\"reference\"><a class=\"cx-link\" data-linkid=\"3\" href=\"#\">2</a></span><span class=\"reference\"><a class=\"cx-link\" data-linkid=\"4\" href=\"#\">3</a></span></span><span class=\"cx-segment\" data-segmentid=\"5\"> Starts with reference</span></p>" + "source": "test-9.html", + "result": "result-9.html" }, { "desc": "References can appear inside a sentence. Example: Hydrogen is a gas[1] and it is ...", - "source": "<p>Sentence one<span class=\"reference\"><a href=\"#\">1</a></span> and rest of sentence</p>", - "result": "<p id=\"0\"><span class=\"cx-segment\" data-segmentid=\"1\">Sentence one<span class=\"reference\"><a class=\"cx-link\" data-linkid=\"2\" href=\"#\">1</a></span> and rest of sentence</span></p>" + "source": "test-10.html", + "result": "result-10.html" }, { "desc": "References can appear inside a sentence. Example:Hydrogen's atomic number is one. Hydrogen is a gas[1] and it is ..... Test the flags are reset properly", - "source": "<p>Hydrogen's atomic number is one.<span class=\"reference\"><a href=\"#\">1</a></span> Hydrogen is a gas<span class=\"reference\"><a href=\"#\">2</a></span> and it is</p>", - "result": "<p id=\"0\"><span class=\"cx-segment\" data-segmentid=\"1\">Hydrogen's atomic number is one.<span class=\"reference\"><a class=\"cx-link\" data-linkid=\"2\" href=\"#\">1</a></span></span><span class=\"cx-segment\" data-segmentid=\"3\"> Hydrogen is a gas<span class=\"reference\"><a class=\"cx-link\" data-linkid=\"4\" href=\"#\">2</a></span> and it is</span></p>" + "source": "test-11.html", + "result": "result-11.html" }, { "desc": "References can appear repeated inside a sentence. Example: Hydrogen is a gas[1][2][3] and it is ...", - "source": "<p>Sentence one<span class=\"reference\"><a href=\"#\">1</a></span><span class=\"reference\"><a href=\"#\">2</a></span><span class=\"reference\"><a href=\"#\">3</a></span> and rest of sentence</p>", - "result": "<p id=\"0\"><span class=\"cx-segment\" data-segmentid=\"1\">Sentence one<span class=\"reference\"><a class=\"cx-link\" data-linkid=\"2\" href=\"#\">1</a></span><span class=\"reference\"><a class=\"cx-link\" data-linkid=\"3\" href=\"#\">2</a></span><span class=\"reference\"><a class=\"cx-link\" data-linkid=\"4\" href=\"#\">3</a></span> and rest of sentence</span></p>" + "source": "test-12.html", + "result": "result-12.html" }, { "desc": "References can appear after period and space. Example: Hydrogen is a gas. [1] It is .... In this case we dont have any choice than considering [1] as part of second sentence", - "source": "<p>Sentence one. <span class=\"reference\"><a href=\"#\">reference</a></span> Starts with reference</p>", - "result": "<p id=\"0\"><span class=\"cx-segment\" data-segmentid=\"1\">Sentence one. </span><span class=\"cx-segment\" data-segmentid=\"2\"><span class=\"reference\"><a class=\"cx-link\" data-linkid=\"3\" href=\"#\">reference</a></span> Starts with reference</span></p>" + "source": "test-13.html", + "result": "result-13.html" + }, + { + "desc": "Paragraph from Debian article- repeating references", + "source": "test-debian-1.html", + "result": "result-debian-1.html" } ], "hi": [ { - "source": "<p>माउज़र पिस्तौल (<a href=\"#\">अंग्रेजी</a>: Mauser C96) मूल रूप से जर्मनी में बनी एक अर्द्ध स्वचालित पिस्तौल है। इस पिस्तौल का डिजाइन जर्मनी निवासी दो माउज़र बन्धुओं ने सन् 1895 में तैयार किया था।</p>", - "result": "<p id=\"0\"><span class=\"cx-segment\" data-segmentid=\"1\">माउज़र पिस्तौल (<a class=\"cx-link\" data-linkid=\"2\" href=\"#\">अंग्रेजी</a>: Mauser C96) मूल रूप से जर्मनी में बनी एक अर्द्ध स्वचालित पिस्तौल है। </span><span class=\"cx-segment\" data-segmentid=\"3\">इस पिस्तौल का डिजाइन जर्मनी निवासी दो माउज़र बन्धुओं ने सन् 1895 में तैयार किया था।</span></p>" + "desc": "Hindi segmentation - basic test", + "source": "test-14.html", + "result": "result-14.html" } ] } diff --git a/tests/segmentation/data/result-1.html b/tests/segmentation/data/result-1.html new file mode 100644 index 0000000..469058c --- /dev/null +++ b/tests/segmentation/data/result-1.html @@ -0,0 +1,8 @@ +<p id="0"> + <span class="cx-segment" data-segmentid="1">A simple paragraph.</span> +</p> + + + + + diff --git a/tests/segmentation/data/result-10.html b/tests/segmentation/data/result-10.html new file mode 100644 index 0000000..96b3225 --- /dev/null +++ b/tests/segmentation/data/result-10.html @@ -0,0 +1,7 @@ +<p id="0"> + <span class="cx-segment" data-segmentid="1">Sentence one + <span class="reference"> + <a class="cx-link" data-linkid="2" href="#">1</a> + </span>and rest of sentence</span> +</p> + diff --git a/tests/segmentation/data/result-11.html b/tests/segmentation/data/result-11.html new file mode 100644 index 0000000..969c3c1 --- /dev/null +++ b/tests/segmentation/data/result-11.html @@ -0,0 +1,12 @@ +<p id="0"> + <span class="cx-segment" data-segmentid="1"> + Hydrogen's atomic number is one.<span class="reference"> + <a class="cx-link" data-linkid="2" href="#">1</a> + </span> + </span> + <span class="cx-segment" data-segmentid="3">Hydrogen is a gas + <span class="reference"> + <a class="cx-link" data-linkid="4" href="#">2</a> + </span>and it is + </span> +</p> diff --git a/tests/segmentation/data/result-12.html b/tests/segmentation/data/result-12.html new file mode 100644 index 0000000..d9a1844 --- /dev/null +++ b/tests/segmentation/data/result-12.html @@ -0,0 +1,12 @@ +<p id="0"> + <span class="cx-segment" data-segmentid="1">Sentence one <span class="reference"> + <a class="cx-link" data-linkid="2" href="#">1</a> + </span> + <span class="reference"> + <a class="cx-link" data-linkid="3" href="#">2</a> + </span> + <span class="reference"> + <a class="cx-link" data-linkid="4" href="#">3</a> + </span> and rest of sentence</span> +</p> + diff --git a/tests/segmentation/data/result-13.html b/tests/segmentation/data/result-13.html new file mode 100644 index 0000000..43e0003 --- /dev/null +++ b/tests/segmentation/data/result-13.html @@ -0,0 +1,8 @@ +<p id="0"> + <span class="cx-segment" data-segmentid="1">Sentence one. </span> + <span class="cx-segment" data-segmentid="2"> + <span class="reference"> + <a class="cx-link" data-linkid="3" href="#">reference</a> + </span> Starts with reference</span> +</p> + diff --git a/tests/segmentation/data/result-14.html b/tests/segmentation/data/result-14.html new file mode 100644 index 0000000..6e4f0fe --- /dev/null +++ b/tests/segmentation/data/result-14.html @@ -0,0 +1,5 @@ +<p id="0"> + <span class="cx-segment" data-segmentid="1">माउज़र पिस्तौल ( + <a class="cx-link" data-linkid="2" href="#">अंग्रेजी</a>: Mauser C96) मूल रूप से जर्मनी में बनी एक अर्द्ध स्वचालित पिस्तौल है। </span> + <span class="cx-segment" data-segmentid="3">इस पिस्तौल का डिजाइन जर्मनी निवासी दो माउज़र बन्धुओं ने सन् 1895 में तैयार किया था।</span> +</p> diff --git a/tests/segmentation/data/result-2.html b/tests/segmentation/data/result-2.html new file mode 100644 index 0000000..9c6b813 --- /dev/null +++ b/tests/segmentation/data/result-2.html @@ -0,0 +1,6 @@ +<p id="0"> + <span class="cx-segment" data-segmentid="1">Hello! </span> + <span class="cx-segment" data-segmentid="2">Mr. D. John, How are you?</span> +</p> + + diff --git a/tests/segmentation/data/result-3.html b/tests/segmentation/data/result-3.html new file mode 100644 index 0000000..d0cc742 --- /dev/null +++ b/tests/segmentation/data/result-3.html @@ -0,0 +1,5 @@ +<p id="0"> + <span class="cx-segment" data-segmentid="1">This is first sentence. </span> + <span class="cx-segment" data-segmentid="2">This is second sentence.</span> +</p> + diff --git a/tests/segmentation/data/result-4.html b/tests/segmentation/data/result-4.html new file mode 100644 index 0000000..ae18988 --- /dev/null +++ b/tests/segmentation/data/result-4.html @@ -0,0 +1,8 @@ +<div id="0"> + <span class="cx-segment" data-segmentid="1">Some div</span> +</div> +<p id="2"> + <span class="cx-segment" data-segmentid="3">This is first sentence. </span> + <span class="cx-segment" data-segmentid="4">This is second sentence</span> +</p> + diff --git a/tests/segmentation/data/result-5.html b/tests/segmentation/data/result-5.html new file mode 100644 index 0000000..72bde12 --- /dev/null +++ b/tests/segmentation/data/result-5.html @@ -0,0 +1,5 @@ +<p id="0"> + <span class="cx-segment" data-segmentid="1"> + <a class="cx-link" data-linkid="2" href="#">Hydrogen</a>is a gas</span> +</p> + diff --git a/tests/segmentation/data/result-6.html b/tests/segmentation/data/result-6.html new file mode 100644 index 0000000..c53e527 --- /dev/null +++ b/tests/segmentation/data/result-6.html @@ -0,0 +1,7 @@ +<p id="0"> + <span class="cx-segment" data-segmentid="1"> + <a class="cx-link" data-linkid="2" href="#">Hydrogen</a>is a + <a class="cx-link" data-linkid="3" href="#">gas</a> + </span> +</p> + diff --git a/tests/segmentation/data/result-7.html b/tests/segmentation/data/result-7.html new file mode 100644 index 0000000..0fdc527 --- /dev/null +++ b/tests/segmentation/data/result-7.html @@ -0,0 +1,11 @@ +<figure id="0"> + <span class="cx-segment" data-segmentid="1"> + <a class="cx-link" data-linkid="2" href="#"> + <img src="img.png"></img> + </a> + </span> + <figcaption id="3"> + <span class="cx-segment" data-segmentid="4">Figure caption</span> + </figcaption> +</figure> + diff --git a/tests/segmentation/data/result-8.html b/tests/segmentation/data/result-8.html new file mode 100644 index 0000000..4832ee4 --- /dev/null +++ b/tests/segmentation/data/result-8.html @@ -0,0 +1,9 @@ +<p id="0"> + <span class="cx-segment" data-segmentid="1">Sentence one. + <span class="reference"> + <a class="cx-link" data-linkid="2" href="#">reference</a> + </span> + </span> + <span class="cx-segment" data-segmentid="3"> Starts with reference</span> +</p> + diff --git a/tests/segmentation/data/result-9.html b/tests/segmentation/data/result-9.html new file mode 100644 index 0000000..87d2f3b --- /dev/null +++ b/tests/segmentation/data/result-9.html @@ -0,0 +1,15 @@ +<p id="0"> + <span class="cx-segment" data-segmentid="1">Sentence one. + <span class="reference"> + <a class="cx-link" data-linkid="2" href="#">1</a> + </span> + <span class="reference"> + <a class="cx-link" data-linkid="3" href="#">2</a> + </span> + <span class="reference"> + <a class="cx-link" data-linkid="4" href="#">3</a> + </span> + </span> + <span class="cx-segment" data-segmentid="5"> Starts with reference</span> +</p> + diff --git a/tests/segmentation/data/result-debian-1.html b/tests/segmentation/data/result-debian-1.html new file mode 100644 index 0000000..57daee5 --- /dev/null +++ b/tests/segmentation/data/result-debian-1.html @@ -0,0 +1,25 @@ +<p id="0"> + <span class="cx-segment" data-segmentid="1">Debian offers + <i>stable</i>and + <i>testing</i>CD images specifically built for + <a class="cx-link" data-linkid="2" rel="mw:WikiLink" href="./GNOME">GNOME</a>(the default), + <a class="cx-link" data-linkid="3" rel="mw:WikiLink" href="./KDE_Plasma_Workspaces">KDE Plasma Workspaces</a>, + <a class="cx-link" data-linkid="4" rel="mw:WikiLink" href="./Xfce">Xfce</a>and + <a class="cx-link" data-linkid="5" rel="mw:WikiLink" href="./LXDE">LXDE</a>. + <span about="#mwt72" class="reference"> + <a class="cx-link" data-linkid="6" href="#cite_note-25">[25]</a> + </span> + <span about="#mwt72" class="reference"> + <a class="cx-link" data-linkid="7" href="#cite_note-25">[26]</a> + </span> + </span> + <span class="cx-segment" data-segmentid="8"> Less common + <a class="cx-link" data-linkid="9" rel="mw:WikiLink" href="./Window_manager">window managers</a>such as + <a class="cx-link" data-linkid="10" rel="mw:WikiLink" href="./Enlightenment_(window_manager)">Enlightenment</a>, + <a class="cx-link" data-linkid="11" rel="mw:WikiLink" href="./Openbox">Openbox</a>, + <a class="cx-link" data-linkid="12" rel="mw:WikiLink" href="./Fluxbox">Fluxbox</a>, + <a class="cx-link" data-linkid="13" rel="mw:WikiLink" href="./GNUstep">GNUstep</a>, + <a class="cx-link" data-linkid="14" rel="mw:WikiLink" href="./IceWM">IceWM</a>, + <a class="cx-link" data-linkid="15" rel="mw:WikiLink" href="./Window_Maker">Window Maker</a>and others can also be installed.</span> +</p> + diff --git a/tests/segmentation/data/test-1.html b/tests/segmentation/data/test-1.html new file mode 100644 index 0000000..84dcde1 --- /dev/null +++ b/tests/segmentation/data/test-1.html @@ -0,0 +1 @@ +<p>A simple paragraph.</p> diff --git a/tests/segmentation/data/test-10.html b/tests/segmentation/data/test-10.html new file mode 100644 index 0000000..5200c64 --- /dev/null +++ b/tests/segmentation/data/test-10.html @@ -0,0 +1,5 @@ +<p>Sentence one + <span class="reference"> + <a href="#">1</a> + </span>and rest of sentence</p> + diff --git a/tests/segmentation/data/test-11.html b/tests/segmentation/data/test-11.html new file mode 100644 index 0000000..46b062a --- /dev/null +++ b/tests/segmentation/data/test-11.html @@ -0,0 +1,9 @@ +<p> + Hydrogen's atomic number is one.<span class="reference"> + <a href="#">1</a> + </span>Hydrogen is a gas<span class="reference"> + <a href="#">2</a> + </span> + and it is +</p> + diff --git a/tests/segmentation/data/test-12.html b/tests/segmentation/data/test-12.html new file mode 100644 index 0000000..510ee41 --- /dev/null +++ b/tests/segmentation/data/test-12.html @@ -0,0 +1,11 @@ +<p>Sentence one <span class="reference"> + <a href="#">1</a> + </span> + <span class="reference"> + <a href="#">2</a> + </span> + <span class="reference"> + <a href="#">3</a> + </span> and rest of sentence +</p> + diff --git a/tests/segmentation/data/test-13.html b/tests/segmentation/data/test-13.html new file mode 100644 index 0000000..413b752 --- /dev/null +++ b/tests/segmentation/data/test-13.html @@ -0,0 +1 @@ +<p>Sentence one. <span class="reference"><a href="#">reference</a></span> Starts with reference</p> diff --git a/tests/segmentation/data/test-14.html b/tests/segmentation/data/test-14.html new file mode 100644 index 0000000..171fd62 --- /dev/null +++ b/tests/segmentation/data/test-14.html @@ -0,0 +1,3 @@ +<p>माउज़र पिस्तौल ( + <a href="#">अंग्रेजी</a>: Mauser C96) मूल रूप से जर्मनी में बनी एक अर्द्ध स्वचालित पिस्तौल है। इस पिस्तौल का डिजाइन जर्मनी निवासी दो माउज़र बन्धुओं ने सन् 1895 में तैयार किया था।</p> + diff --git a/tests/segmentation/data/test-2.html b/tests/segmentation/data/test-2.html new file mode 100644 index 0000000..b2c8369 --- /dev/null +++ b/tests/segmentation/data/test-2.html @@ -0,0 +1 @@ +<p>Hello! Mr. D. John, How are you?</p> diff --git a/tests/segmentation/data/test-3.html b/tests/segmentation/data/test-3.html new file mode 100644 index 0000000..5fa6ef3 --- /dev/null +++ b/tests/segmentation/data/test-3.html @@ -0,0 +1 @@ +<p>This is first sentence. This is second sentence.</p> diff --git a/tests/segmentation/data/test-4.html b/tests/segmentation/data/test-4.html new file mode 100644 index 0000000..d7db0ef --- /dev/null +++ b/tests/segmentation/data/test-4.html @@ -0,0 +1,2 @@ +<div>Some div</div> +<p>This is first sentence. This is second sentence</p> diff --git a/tests/segmentation/data/test-5.html b/tests/segmentation/data/test-5.html new file mode 100644 index 0000000..870717c --- /dev/null +++ b/tests/segmentation/data/test-5.html @@ -0,0 +1,3 @@ +<p> + <a href="#">Hydrogen</a>is a gas</p> + diff --git a/tests/segmentation/data/test-6.html b/tests/segmentation/data/test-6.html new file mode 100644 index 0000000..95aab00 --- /dev/null +++ b/tests/segmentation/data/test-6.html @@ -0,0 +1,5 @@ +<p> + <a href="#">Hydrogen</a>is a + <a href="#">gas</a> +</p> + diff --git a/tests/segmentation/data/test-7.html b/tests/segmentation/data/test-7.html new file mode 100644 index 0000000..5c78e2d --- /dev/null +++ b/tests/segmentation/data/test-7.html @@ -0,0 +1,9 @@ +<figure> + <a href="#"> + <img src="img.png"> + </a> + <figcaption>Figure caption</figcaption> +</figure> + + + diff --git a/tests/segmentation/data/test-8.html b/tests/segmentation/data/test-8.html new file mode 100644 index 0000000..06f0e69 --- /dev/null +++ b/tests/segmentation/data/test-8.html @@ -0,0 +1,2 @@ +<p>Sentence one.<span class="reference"><a href="#">reference</a></span> Starts with reference</p> + diff --git a/tests/segmentation/data/test-9.html b/tests/segmentation/data/test-9.html new file mode 100644 index 0000000..c4a22d2 --- /dev/null +++ b/tests/segmentation/data/test-9.html @@ -0,0 +1,10 @@ +<p>Sentence one.<span class="reference"> + <a href="#">1</a> + </span> + <span class="reference"> + <a href="#">2</a> + </span> + <span class="reference"> + <a href="#">3</a> + </span> Starts with reference</p> + diff --git a/tests/segmentation/data/test-debian-1.html b/tests/segmentation/data/test-debian-1.html new file mode 100644 index 0000000..ccabacd --- /dev/null +++ b/tests/segmentation/data/test-debian-1.html @@ -0,0 +1,17 @@ +<p> + Debian offers + <i>stable</i>and + <i>testing</i>CD images specifically built for + <a rel="mw:WikiLink" href="./GNOME">GNOME</a>(the default), + <a rel="mw:WikiLink" href="./KDE_Plasma_Workspaces">KDE Plasma Workspaces</a>, + <a rel="mw:WikiLink" href="./Xfce">Xfce</a>and + <a rel="mw:WikiLink" href="./LXDE">LXDE</a>.<span about="#mwt72" class="reference"> + <a href="#cite_note-25">[25]</a></span> + <span about="#mwt72" class="reference"><a href="#cite_note-25">[26]</a></span> Less common + <a rel="mw:WikiLink" href="./Window_manager">window managers</a>such as + <a rel="mw:WikiLink" href="./Enlightenment_(window_manager)">Enlightenment</a>, + <a rel="mw:WikiLink" href="./Openbox">Openbox</a>, + <a rel="mw:WikiLink" href="./Fluxbox">Fluxbox</a>, + <a rel="mw:WikiLink" href="./GNUstep">GNUstep</a>, + <a rel="mw:WikiLink" href="./IceWM">IceWM</a>, + <a rel="mw:WikiLink" href="./Window_Maker">Window Maker</a>and others can also be installed.</p> -- To view, visit https://gerrit.wikimedia.org/r/124298 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: newchange Gerrit-Change-Id: I4ee76ef58c5b3ab171269dbbe1a083dbb78382ec Gerrit-PatchSet: 1 Gerrit-Project: mediawiki/services/cxserver Gerrit-Branch: master Gerrit-Owner: Santhosh <santhosh.thottin...@gmail.com> _______________________________________________ MediaWiki-commits mailing list MediaWiki-commits@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits