jenkins-bot has submitted this change and it was merged.

Change subject: Support 5+ digit years in DateTimeParser
......................................................................


Support 5+ digit years in DateTimeParser

Bug: 62648
Change-Id: I03b799542a8d6f334116d6a56d34ff9bbec88f40
---
M lib/includes/parsers/DateTimeParser.php
M lib/tests/phpunit/parsers/DateTimeParserTest.php
2 files changed, 43 insertions(+), 5 deletions(-)

Approvals:
  WikidataJenkins: Verified
  Thiemo Mättig (WMDE): Looks good to me, approved
  jenkins-bot: Verified



diff --git a/lib/includes/parsers/DateTimeParser.php 
b/lib/includes/parsers/DateTimeParser.php
index 85b26bc..8c408fa 100644
--- a/lib/includes/parsers/DateTimeParser.php
+++ b/lib/includes/parsers/DateTimeParser.php
@@ -34,9 +34,10 @@
        }
 
        /**
-        * Parses the provided string and returns the result.
+        * Parses the provided string
         *
-        * @param string $value
+        * @param string $value in a format as specified by the PHP DateTime 
object
+        *       there are exceptions as we can handel 5+ digit dates
         *
         * @throws ParseException
         * @return TimeValue
@@ -44,6 +45,10 @@
        protected function stringParse( $value ) {
                $calendarModelParser = new CalendarModelParser();
                $options = $this->getOptions();
+
+               //Place to put large years when they are found
+               $largeYear = null;
+
                try{
                        $value = $this->getValueWithFixedYearLengths(
                                $this->getValueWithFixedSeparators(
@@ -55,10 +60,21 @@
                                )
                        );
 
+                       //PHP's DateTime object also cant handel larger than 4 
digit years
+                       //e.g. 1 June 202020
+                       if( preg_match( '/^(.*[^\d]|)(\d{5,})(.*|)$/', $value, 
$matches ) ) {
+                               $value = $matches[1] . substr( $matches[2], -4 
) . $matches[3];
+                               $largeYear = $matches[2];
+                       }
+
                        //Parse using the DateTime object (this will allow us 
to format the date in a nicer way)
                        //TODO try to match and remove BCE etc. before putting 
the value into the DateTime object to get - dates!
                        $dateTime = new DateTime( $value );
-                       $timeString = '+' . $dateTime->format( 'Y-m-d\TH:i:s\Z' 
);
+                       if( $largeYear === null ) {
+                               $timeString = '+' . $dateTime->format( 
'Y-m-d\TH:i:s\Z' );
+                       } else {
+                               $timeString = '+' . $largeYear . 
$dateTime->format( '-m-d\TH:i:s\Z' );
+                       }
 
                        //Pass the reformatted string into a base parser that 
parses this +/-Y-m-d\TH:i:s\Z format with a precision
                        $valueParser = new \ValueParsers\TimeParser( 
$calendarModelParser, $options );
@@ -114,4 +130,4 @@
                return $value;
        }
 
-}
\ No newline at end of file
+}
diff --git a/lib/tests/phpunit/parsers/DateTimeParserTest.php 
b/lib/tests/phpunit/parsers/DateTimeParserTest.php
index de40bec..c9e5912 100644
--- a/lib/tests/phpunit/parsers/DateTimeParserTest.php
+++ b/lib/tests/phpunit/parsers/DateTimeParserTest.php
@@ -84,6 +84,10 @@
                                array( '+0000000000000055-01-09T00:00:00Z', 0 , 
0 , 0 , TimeValue::PRECISION_DAY , TimeFormatter::CALENDAR_GREGORIAN ),
                        '555-01-09' =>
                                array( '+0000000000000555-01-09T00:00:00Z', 0 , 
0 , 0 , TimeValue::PRECISION_DAY , TimeFormatter::CALENDAR_GREGORIAN ),
+                       '33300-1-1' =>
+                               array( '+0000000000033300-01-01T00:00:00Z', 0 , 
0 , 0 , TimeValue::PRECISION_DAY , TimeFormatter::CALENDAR_GREGORIAN ),
+                       '3330002-1-1' =>
+                               array( '+0000000003330002-01-01T00:00:00Z', 0 , 
0 , 0 , TimeValue::PRECISION_DAY , TimeFormatter::CALENDAR_GREGORIAN ),
 
                        //Less than 4 digit years
                        '10/10/10' =>
@@ -104,6 +108,25 @@
                                array( '+0000000000000111-07-04T00:00:00Z', 0 , 
0 , 0 , TimeValue::PRECISION_DAY , TimeFormatter::CALENDAR_GREGORIAN ),
                        '4th July 1' =>
                                array( '+0000000000000001-07-04T00:00:00Z', 0 , 
0 , 0 , TimeValue::PRECISION_DAY , TimeFormatter::CALENDAR_GREGORIAN ),
+
+                       //More than 4 digit years
+                       '4th July 10000' =>
+                               array( '+0000000000010000-07-04T00:00:00Z', 0 , 
0 , 0 , TimeValue::PRECISION_DAY , TimeFormatter::CALENDAR_GREGORIAN ),
+                       '10/10/22000' =>
+                               array( '+0000000000022000-10-10T00:00:00Z', 0 , 
0 , 0 , TimeValue::PRECISION_DAY , TimeFormatter::CALENDAR_GREGORIAN ),
+                       '1-1-33300' =>
+                               array( '+0000000000033300-01-01T00:00:00Z', 0 , 
0 , 0 , TimeValue::PRECISION_DAY , TimeFormatter::CALENDAR_GREGORIAN ),
+                       '4th July 7214614279199781' =>
+                               array( '+7214614279199781-07-04T00:00:00Z', 0 , 
0 , 0 , TimeValue::PRECISION_DAY , TimeFormatter::CALENDAR_GREGORIAN ),
+
+                       //Testing Leap Year stuff
+                       '10000-02-29' =>
+                               array( '+0000000000010000-02-29T00:00:00Z', 0 , 
0 , 0 , TimeValue::PRECISION_DAY , TimeFormatter::CALENDAR_GREGORIAN ),
+                       '10100-02-29' =>
+                               array( '+0000000000010100-03-01T00:00:00Z', 0 , 
0 , 0 , TimeValue::PRECISION_DAY , TimeFormatter::CALENDAR_GREGORIAN ),
+                       '10400-02-29' =>
+                               array( '+0000000000010400-02-29T00:00:00Z', 0 , 
0 , 0 , TimeValue::PRECISION_DAY , TimeFormatter::CALENDAR_GREGORIAN ),
+
 
                );
 
@@ -126,7 +149,6 @@
                        'Jann 2014',
 
                        // Not within the scope of this parser
-                       '1 July 20000', // The DateTime object cant parse years 
with more than 4 digits
                        '100BC', // The DateTime object cant parse BC years
                );
 

-- 
To view, visit https://gerrit.wikimedia.org/r/119040
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: merged
Gerrit-Change-Id: I03b799542a8d6f334116d6a56d34ff9bbec88f40
Gerrit-PatchSet: 8
Gerrit-Project: mediawiki/extensions/Wikibase
Gerrit-Branch: master
Gerrit-Owner: Addshore <addshorew...@gmail.com>
Gerrit-Reviewer: Addshore <addshorew...@gmail.com>
Gerrit-Reviewer: Adrian Lang <adrian.l...@wikimedia.de>
Gerrit-Reviewer: Aude <aude.w...@gmail.com>
Gerrit-Reviewer: Daniel Kinzler <daniel.kinz...@wikimedia.de>
Gerrit-Reviewer: Jeroen De Dauw <jeroended...@gmail.com>
Gerrit-Reviewer: Thiemo Mättig (WMDE) <thiemo.maet...@wikimedia.de>
Gerrit-Reviewer: Tobias Gritschacher <tobias.gritschac...@wikimedia.de>
Gerrit-Reviewer: WikidataJenkins <wikidata-servi...@wikimedia.de>
Gerrit-Reviewer: jenkins-bot <>

_______________________________________________
MediaWiki-commits mailing list
MediaWiki-commits@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to