[MediaWiki-commits] [Gerrit] mediawiki...Wikispeech[master]: Don't create empty elements

2017-03-30 Thread jenkins-bot (Code Review)
jenkins-bot has submitted this change and it was merged. ( 
https://gerrit.wikimedia.org/r/345522 )

Change subject: Don't create empty  elements
..


Don't create empty  elements

These were created when a `CleanedText` only contained
whitespaces. The whitespaces were removed, but a `CleanedText` (with
the empty string) was created.

Bug: T159669
Change-Id: I2bb150b2e69357cd53e587344f41183da0b27bc1
---
M includes/Segmenter.php
M tests/phpunit/SegmenterTest.php
2 files changed, 34 insertions(+), 19 deletions(-)

Approvals:
  Lokal Profil: Looks good to me, approved
  jenkins-bot: Verified



diff --git a/includes/Segmenter.php b/includes/Segmenter.php
index ea98d09..9dcb75f 100644
--- a/includes/Segmenter.php
+++ b/includes/Segmenter.php
@@ -135,25 +135,28 @@
$startOffset,
$endOffset - $startOffset + 1
);
-   $sentenceText = new CleanedText(
-   $sentence,
-   $text->path
-   );
-   array_push( $currentSegment['content'], $sentenceText );
-   if ( $currentSegment['startOffset'] === null ) {
-   // Record the start offset if this is the first text 
added
-   // to the segment.
-   $currentSegment['startOffset'] = $startOffset;
-   }
-   $currentSegment['endOffset'] = $endOffset;
-   if ( $ended ) {
-   array_push( $segments, $currentSegment );
-   // Create a fresh segment to add following text to.
-   $currentSegment = [
-   'content' => [],
-   'startOffset' => null,
-   'endOffset' => null
-   ];
+   if ( $sentence !== '' ) {
+   // Don't add `CleanedText`s with the empty string.
+   $sentenceText = new CleanedText(
+   $sentence,
+   $text->path
+   );
+   array_push( $currentSegment['content'], $sentenceText );
+   if ( $currentSegment['startOffset'] === null ) {
+   // Record the start offset if this is the first 
text
+   // added to the segment.
+   $currentSegment['startOffset'] = $startOffset;
+   }
+   $currentSegment['endOffset'] = $endOffset;
+   if ( $ended ) {
+   array_push( $segments, $currentSegment );
+   // Create a fresh segment to add following text 
to.
+   $currentSegment = [
+   'content' => [],
+   'startOffset' => null,
+   'endOffset' => null
+   ];
+   }
}
return $endOffset;
}
diff --git a/tests/phpunit/SegmenterTest.php b/tests/phpunit/SegmenterTest.php
index a7fd816..155fa18 100644
--- a/tests/phpunit/SegmenterTest.php
+++ b/tests/phpunit/SegmenterTest.php
@@ -192,6 +192,18 @@
);
}
 
+   public function testRemoveTextWithOnlyWhitespacesOutsideSegments() {
+   $cleanedContent = [
+   new CleanedText( ' ' ),
+   new CleanedText( 'Sentence 1.' )
+   ];
+   $segments = Segmenter::segmentSentences( $cleanedContent );
+   $this->assertEquals(
+   'Sentence 1.',
+   $segments[0]['content'][0]->string
+   );
+   }
+
public function testRemoveLeadingAndTrailingWhitespaces() {
$cleanedContent = [ new CleanedText( ' Sentence. ' ) ];
$segments = Segmenter::segmentSentences( $cleanedContent );

-- 
To view, visit https://gerrit.wikimedia.org/r/345522
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: merged
Gerrit-Change-Id: I2bb150b2e69357cd53e587344f41183da0b27bc1
Gerrit-PatchSet: 1
Gerrit-Project: mediawiki/extensions/Wikispeech
Gerrit-Branch: master
Gerrit-Owner: Sebastian Berlin (WMSE) 
Gerrit-Reviewer: Lokal Profil 
Gerrit-Reviewer: jenkins-bot <>

___
MediaWiki-commits mailing list
MediaWiki-commits@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits


[MediaWiki-commits] [Gerrit] mediawiki...Wikispeech[master]: Don't create empty elements

2017-03-30 Thread Sebastian Berlin (WMSE) (Code Review)
Sebastian Berlin (WMSE) has uploaded a new change for review. ( 
https://gerrit.wikimedia.org/r/345522 )

Change subject: Don't create empty  elements
..

Don't create empty  elements

These were created when a `CleanedText` only contained
whitespaces. The whitespaces were removed, but a `CleanedText` (with
the empty string) was created.

Bug: T159669
Change-Id: I2bb150b2e69357cd53e587344f41183da0b27bc1
---
M includes/Segmenter.php
M tests/phpunit/SegmenterTest.php
2 files changed, 34 insertions(+), 19 deletions(-)


  git pull ssh://gerrit.wikimedia.org:29418/mediawiki/extensions/Wikispeech 
refs/changes/22/345522/1

diff --git a/includes/Segmenter.php b/includes/Segmenter.php
index ea98d09..9dcb75f 100644
--- a/includes/Segmenter.php
+++ b/includes/Segmenter.php
@@ -135,25 +135,28 @@
$startOffset,
$endOffset - $startOffset + 1
);
-   $sentenceText = new CleanedText(
-   $sentence,
-   $text->path
-   );
-   array_push( $currentSegment['content'], $sentenceText );
-   if ( $currentSegment['startOffset'] === null ) {
-   // Record the start offset if this is the first text 
added
-   // to the segment.
-   $currentSegment['startOffset'] = $startOffset;
-   }
-   $currentSegment['endOffset'] = $endOffset;
-   if ( $ended ) {
-   array_push( $segments, $currentSegment );
-   // Create a fresh segment to add following text to.
-   $currentSegment = [
-   'content' => [],
-   'startOffset' => null,
-   'endOffset' => null
-   ];
+   if ( $sentence !== '' ) {
+   // Don't add `CleanedText`s with the empty string.
+   $sentenceText = new CleanedText(
+   $sentence,
+   $text->path
+   );
+   array_push( $currentSegment['content'], $sentenceText );
+   if ( $currentSegment['startOffset'] === null ) {
+   // Record the start offset if this is the first 
text
+   // added to the segment.
+   $currentSegment['startOffset'] = $startOffset;
+   }
+   $currentSegment['endOffset'] = $endOffset;
+   if ( $ended ) {
+   array_push( $segments, $currentSegment );
+   // Create a fresh segment to add following text 
to.
+   $currentSegment = [
+   'content' => [],
+   'startOffset' => null,
+   'endOffset' => null
+   ];
+   }
}
return $endOffset;
}
diff --git a/tests/phpunit/SegmenterTest.php b/tests/phpunit/SegmenterTest.php
index a7fd816..155fa18 100644
--- a/tests/phpunit/SegmenterTest.php
+++ b/tests/phpunit/SegmenterTest.php
@@ -192,6 +192,18 @@
);
}
 
+   public function testRemoveTextWithOnlyWhitespacesOutsideSegments() {
+   $cleanedContent = [
+   new CleanedText( ' ' ),
+   new CleanedText( 'Sentence 1.' )
+   ];
+   $segments = Segmenter::segmentSentences( $cleanedContent );
+   $this->assertEquals(
+   'Sentence 1.',
+   $segments[0]['content'][0]->string
+   );
+   }
+
public function testRemoveLeadingAndTrailingWhitespaces() {
$cleanedContent = [ new CleanedText( ' Sentence. ' ) ];
$segments = Segmenter::segmentSentences( $cleanedContent );

-- 
To view, visit https://gerrit.wikimedia.org/r/345522
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: I2bb150b2e69357cd53e587344f41183da0b27bc1
Gerrit-PatchSet: 1
Gerrit-Project: mediawiki/extensions/Wikispeech
Gerrit-Branch: master
Gerrit-Owner: Sebastian Berlin (WMSE) 

___
MediaWiki-commits mailing list
MediaWiki-commits@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits