[MediaWiki-commits] [Gerrit] mediawiki...Wikispeech[master]: Don't create empty elements
jenkins-bot has submitted this change and it was merged. ( https://gerrit.wikimedia.org/r/345522 ) Change subject: Don't create empty elements .. Don't create empty elements These were created when a `CleanedText` only contained whitespaces. The whitespaces were removed, but a `CleanedText` (with the empty string) was created. Bug: T159669 Change-Id: I2bb150b2e69357cd53e587344f41183da0b27bc1 --- M includes/Segmenter.php M tests/phpunit/SegmenterTest.php 2 files changed, 34 insertions(+), 19 deletions(-) Approvals: Lokal Profil: Looks good to me, approved jenkins-bot: Verified diff --git a/includes/Segmenter.php b/includes/Segmenter.php index ea98d09..9dcb75f 100644 --- a/includes/Segmenter.php +++ b/includes/Segmenter.php @@ -135,25 +135,28 @@ $startOffset, $endOffset - $startOffset + 1 ); - $sentenceText = new CleanedText( - $sentence, - $text->path - ); - array_push( $currentSegment['content'], $sentenceText ); - if ( $currentSegment['startOffset'] === null ) { - // Record the start offset if this is the first text added - // to the segment. - $currentSegment['startOffset'] = $startOffset; - } - $currentSegment['endOffset'] = $endOffset; - if ( $ended ) { - array_push( $segments, $currentSegment ); - // Create a fresh segment to add following text to. - $currentSegment = [ - 'content' => [], - 'startOffset' => null, - 'endOffset' => null - ]; + if ( $sentence !== '' ) { + // Don't add `CleanedText`s with the empty string. + $sentenceText = new CleanedText( + $sentence, + $text->path + ); + array_push( $currentSegment['content'], $sentenceText ); + if ( $currentSegment['startOffset'] === null ) { + // Record the start offset if this is the first text + // added to the segment. + $currentSegment['startOffset'] = $startOffset; + } + $currentSegment['endOffset'] = $endOffset; + if ( $ended ) { + array_push( $segments, $currentSegment ); + // Create a fresh segment to add following text to. + $currentSegment = [ + 'content' => [], + 'startOffset' => null, + 'endOffset' => null + ]; + } } return $endOffset; } diff --git a/tests/phpunit/SegmenterTest.php b/tests/phpunit/SegmenterTest.php index a7fd816..155fa18 100644 --- a/tests/phpunit/SegmenterTest.php +++ b/tests/phpunit/SegmenterTest.php @@ -192,6 +192,18 @@ ); } + public function testRemoveTextWithOnlyWhitespacesOutsideSegments() { + $cleanedContent = [ + new CleanedText( ' ' ), + new CleanedText( 'Sentence 1.' ) + ]; + $segments = Segmenter::segmentSentences( $cleanedContent ); + $this->assertEquals( + 'Sentence 1.', + $segments[0]['content'][0]->string + ); + } + public function testRemoveLeadingAndTrailingWhitespaces() { $cleanedContent = [ new CleanedText( ' Sentence. ' ) ]; $segments = Segmenter::segmentSentences( $cleanedContent ); -- To view, visit https://gerrit.wikimedia.org/r/345522 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: merged Gerrit-Change-Id: I2bb150b2e69357cd53e587344f41183da0b27bc1 Gerrit-PatchSet: 1 Gerrit-Project: mediawiki/extensions/Wikispeech Gerrit-Branch: master Gerrit-Owner: Sebastian Berlin (WMSE) Gerrit-Reviewer: Lokal Profil Gerrit-Reviewer: jenkins-bot <> ___ MediaWiki-commits mailing list MediaWiki-commits@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits
[MediaWiki-commits] [Gerrit] mediawiki...Wikispeech[master]: Don't create empty elements
Sebastian Berlin (WMSE) has uploaded a new change for review. ( https://gerrit.wikimedia.org/r/345522 ) Change subject: Don't create empty elements .. Don't create empty elements These were created when a `CleanedText` only contained whitespaces. The whitespaces were removed, but a `CleanedText` (with the empty string) was created. Bug: T159669 Change-Id: I2bb150b2e69357cd53e587344f41183da0b27bc1 --- M includes/Segmenter.php M tests/phpunit/SegmenterTest.php 2 files changed, 34 insertions(+), 19 deletions(-) git pull ssh://gerrit.wikimedia.org:29418/mediawiki/extensions/Wikispeech refs/changes/22/345522/1 diff --git a/includes/Segmenter.php b/includes/Segmenter.php index ea98d09..9dcb75f 100644 --- a/includes/Segmenter.php +++ b/includes/Segmenter.php @@ -135,25 +135,28 @@ $startOffset, $endOffset - $startOffset + 1 ); - $sentenceText = new CleanedText( - $sentence, - $text->path - ); - array_push( $currentSegment['content'], $sentenceText ); - if ( $currentSegment['startOffset'] === null ) { - // Record the start offset if this is the first text added - // to the segment. - $currentSegment['startOffset'] = $startOffset; - } - $currentSegment['endOffset'] = $endOffset; - if ( $ended ) { - array_push( $segments, $currentSegment ); - // Create a fresh segment to add following text to. - $currentSegment = [ - 'content' => [], - 'startOffset' => null, - 'endOffset' => null - ]; + if ( $sentence !== '' ) { + // Don't add `CleanedText`s with the empty string. + $sentenceText = new CleanedText( + $sentence, + $text->path + ); + array_push( $currentSegment['content'], $sentenceText ); + if ( $currentSegment['startOffset'] === null ) { + // Record the start offset if this is the first text + // added to the segment. + $currentSegment['startOffset'] = $startOffset; + } + $currentSegment['endOffset'] = $endOffset; + if ( $ended ) { + array_push( $segments, $currentSegment ); + // Create a fresh segment to add following text to. + $currentSegment = [ + 'content' => [], + 'startOffset' => null, + 'endOffset' => null + ]; + } } return $endOffset; } diff --git a/tests/phpunit/SegmenterTest.php b/tests/phpunit/SegmenterTest.php index a7fd816..155fa18 100644 --- a/tests/phpunit/SegmenterTest.php +++ b/tests/phpunit/SegmenterTest.php @@ -192,6 +192,18 @@ ); } + public function testRemoveTextWithOnlyWhitespacesOutsideSegments() { + $cleanedContent = [ + new CleanedText( ' ' ), + new CleanedText( 'Sentence 1.' ) + ]; + $segments = Segmenter::segmentSentences( $cleanedContent ); + $this->assertEquals( + 'Sentence 1.', + $segments[0]['content'][0]->string + ); + } + public function testRemoveLeadingAndTrailingWhitespaces() { $cleanedContent = [ new CleanedText( ' Sentence. ' ) ]; $segments = Segmenter::segmentSentences( $cleanedContent ); -- To view, visit https://gerrit.wikimedia.org/r/345522 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: newchange Gerrit-Change-Id: I2bb150b2e69357cd53e587344f41183da0b27bc1 Gerrit-PatchSet: 1 Gerrit-Project: mediawiki/extensions/Wikispeech Gerrit-Branch: master Gerrit-Owner: Sebastian Berlin (WMSE) ___ MediaWiki-commits mailing list MediaWiki-commits@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits