Author: tilman
Date: Fri Dec 12 11:00:59 2025
New Revision: 1930456
Log:
PDFBOX-6119: support dates without timezone + use jdk8, by Andrea Vacondio
Modified:
pdfbox/trunk/xmpbox/src/main/java/org/apache/xmpbox/DateConverter.java
pdfbox/trunk/xmpbox/src/test/java/org/apache/xmpbox/DateConverterTest.java
Modified: pdfbox/trunk/xmpbox/src/main/java/org/apache/xmpbox/DateConverter.java
==============================================================================
--- pdfbox/trunk/xmpbox/src/main/java/org/apache/xmpbox/DateConverter.java
Fri Dec 12 08:30:14 2025 (r1930455)
+++ pdfbox/trunk/xmpbox/src/main/java/org/apache/xmpbox/DateConverter.java
Fri Dec 12 11:00:59 2025 (r1930456)
@@ -24,8 +24,11 @@ package org.apache.xmpbox;
import java.io.IOException;
import java.text.ParseException;
import java.text.SimpleDateFormat;
+import java.time.LocalDateTime;
+import java.time.ZoneId;
import java.time.ZonedDateTime;
import java.time.format.DateTimeFormatter;
+import java.time.format.DateTimeFormatterBuilder;
import java.time.format.DateTimeParseException;
import java.util.Calendar;
import java.util.Date;
@@ -33,7 +36,6 @@ import java.util.GregorianCalendar;
import java.util.Locale;
import java.util.SimpleTimeZone;
import java.util.TimeZone;
-import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
@@ -52,6 +54,10 @@ import java.util.regex.Pattern;
public final class DateConverter
{
+ public static final DateTimeFormatter DATE_TIME_FORMATTER = new
DateTimeFormatterBuilder().parseCaseInsensitive()
+
.append(DateTimeFormatter.ISO_LOCAL_DATE_TIME).parseLenient().appendOffset("+HH:MM",
"Z").parseStrict()
+ .toFormatter();
+
// The Date format is supposed to be the PDF_DATE_FORMAT, but not all PDF
// documents
// will use that date, so I have added a couple other potential formats
@@ -100,19 +106,10 @@ public final class DateConverter
try
{
SimpleTimeZone zone = null;
-
+
if (Pattern.matches("^\\d{4}-\\d{2}-\\d{2}T.*", date))
{
- // Assuming ISO860 date string
- try
- {
- return fromISO8601(date,
"yyyy-MM-dd'T'HH:mm:ss[.SSS][XXX][zzz]");
- }
- catch (DateTimeParseException e)
- {
- // PDFBOX-6062: support nanoseconds
- return fromISO8601(date,
"yyyy-MM-dd'T'HH:mm:ss[.SSSSSS][XXX][zzz]");
- }
+ return fromISO8601(date);
}
if (date.startsWith("D:"))
{
@@ -339,77 +336,18 @@ public final class DateConverter
retval.append(minutes);
return retval.toString();
}
-
- /**
- * Get a Calendar from an ISO8601 date string.
- *
- * @param dateString
- * @return the Calendar instance.
- */
- private static Calendar fromISO8601(String dateString, String pattern)
- {
- DateTimeFormatter dateTimeFormatter =
DateTimeFormatter.ofPattern(pattern);
-
- // Pattern to test for a time zone string
- Pattern timeZonePattern = Pattern.compile(
-
"[\\d-]*T?[\\d-\\.]([A-Z]{1,4})$|(.*\\d*)([A-Z][a-z]+\\/[A-Z][a-z]+)$"
- );
- Matcher timeZoneMatcher = timeZonePattern.matcher(dateString);
-
- String timeZoneString = null;
-
- while (timeZoneMatcher.find())
- {
- for (int i = 1; i <= timeZoneMatcher.groupCount(); i++)
- {
- String group = timeZoneMatcher.group(i);
- if (group != null)
- {
- timeZoneString = group;
- }
- }
- }
- if (timeZoneString != null)
+ private static Calendar fromISO8601(String dateString)
+ {
+ try
{
- // can't use parseDateTime immediately, first do handling for time
that has no seconds
- int teeIndex = dateString.indexOf('T');
- int tzIndex = dateString.indexOf(timeZoneString);
- String toParse = dateString.substring(0, tzIndex);
- if (tzIndex - teeIndex == 6)
- {
- toParse = dateString.substring(0, tzIndex) + ":00";
- }
-
- ZonedDateTime zonedDateTime = ZonedDateTime.parse(toParse +
timeZoneString, dateTimeFormatter);
-
+ ZonedDateTime zonedDateTime = ZonedDateTime.parse(dateString,
DATE_TIME_FORMATTER);
return GregorianCalendar.from(zonedDateTime);
}
- else
+ catch (DateTimeParseException e)
{
- // can't use parseDateTime immediately, first do handling for time
that has no seconds
- int teeIndex = dateString.indexOf('T');
- if (teeIndex == -1)
- {
- ZonedDateTime zonedDateTime = ZonedDateTime.parse(dateString,
dateTimeFormatter);
- return GregorianCalendar.from(zonedDateTime);
- }
- int plusIndex = dateString.indexOf('+', teeIndex + 1);
- int minusIndex = dateString.indexOf('-', teeIndex + 1);
- if (plusIndex == -1 && minusIndex == -1)
- {
- ZonedDateTime zonedDateTime = ZonedDateTime.parse(dateString,
dateTimeFormatter);
- return GregorianCalendar.from(zonedDateTime);
- }
- plusIndex = Math.max(plusIndex, minusIndex);
- if (plusIndex - teeIndex == 6)
- {
- String toParse = dateString.substring(0, plusIndex) + ":00" +
dateString.substring(plusIndex);
- ZonedDateTime zonedDateTime = ZonedDateTime.parse(toParse,
dateTimeFormatter);
- return GregorianCalendar.from(zonedDateTime);
- }
- ZonedDateTime zonedDateTime = ZonedDateTime.parse(dateString,
dateTimeFormatter);
- return GregorianCalendar.from(zonedDateTime);
+ LocalDateTime localDateTime = LocalDateTime.parse(dateString,
DateTimeFormatter.ISO_LOCAL_DATE_TIME);
+ return
GregorianCalendar.from(localDateTime.atZone(ZoneId.of("UTC")));
}
}
}
Modified:
pdfbox/trunk/xmpbox/src/test/java/org/apache/xmpbox/DateConverterTest.java
==============================================================================
--- pdfbox/trunk/xmpbox/src/test/java/org/apache/xmpbox/DateConverterTest.java
Fri Dec 12 08:30:14 2025 (r1930455)
+++ pdfbox/trunk/xmpbox/src/test/java/org/apache/xmpbox/DateConverterTest.java
Fri Dec 12 11:00:59 2025 (r1930456)
@@ -21,12 +21,16 @@
package org.apache.xmpbox;
+import static java.time.format.DateTimeFormatter.ISO_LOCAL_DATE_TIME;
+
import java.io.IOException;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertThrows;
import java.text.SimpleDateFormat;
+import java.time.LocalDateTime;
+import java.time.ZoneId;
import java.time.ZonedDateTime;
import java.time.format.DateTimeFormatter;
import java.util.Calendar;
@@ -53,8 +57,14 @@ class DateConverterTest
void testDateConversion() throws IOException
{
// Test partial dates
- Calendar convDate = DateConverter.toCalendar("2015-02-02");
+ Calendar convDate = DateConverter.toCalendar("2015");
+ assertEquals(2015, convDate.get(Calendar.YEAR));
+ convDate = DateConverter.toCalendar("2015-05");
+ assertEquals(4, convDate.get(Calendar.MONTH));
+ convDate = DateConverter.toCalendar("2015-05-02");
assertEquals(2015, convDate.get(Calendar.YEAR));
+ assertEquals(4, convDate.get(Calendar.MONTH));
+ assertEquals(2, convDate.get(Calendar.DAY_OF_MONTH));
convDate = DateConverter.toCalendar("D:2015-02-02");
assertEquals(2015, convDate.get(Calendar.YEAR));
@@ -133,16 +143,6 @@ class DateConverterTest
assertEquals(DateConverter.toCalendar(testString2).toInstant(),ZonedDateTime.parse(testString1,
dateTimeFormatter).toInstant());
- testString1 = "2015-02-02T16:37:19.192Z";
- testString2 = "2015-02-02T08:37:19.192PST";
-
-
assertEquals(DateConverter.toCalendar(testString2).toInstant(),ZonedDateTime.parse(testString1,
dateTimeFormatter).toInstant());
-
- testString1 = "2015-02-02T16:37:19.192+01:00";
- testString2 = "2015-02-02T16:37:19.192Europe/Berlin";
-
-
assertEquals(DateConverter.toCalendar(testString2).toInstant(),ZonedDateTime.parse(testString1,
dateTimeFormatter).toInstant());
-
// PDFBOX-4902: half-hour TZ
testString1 = "2015-02-02T16:37:19.192+05:30";
assertEquals(DateConverter.toCalendar(testString1).toInstant(),ZonedDateTime.parse(testString1,
dateTimeFormatter).toInstant());
@@ -152,6 +152,11 @@ class DateConverterTest
testString1 = "2015-02-02T16:37:19.192+10:30";
assertEquals(DateConverter.toCalendar(testString1).toInstant(),ZonedDateTime.parse(testString1,
dateTimeFormatter).toInstant());
+
+ testString1 = "2024-04-09T14:41:38";
+ assertEquals(DateConverter.toCalendar(testString1).toInstant(),
+ LocalDateTime.parse(testString1,
ISO_LOCAL_DATE_TIME).atZone(ZoneId.of("UTC"))
+ .toInstant());
}
/**