Author: jerome Date: Thu Sep 1 15:20:51 2005 New Revision: 265794 URL: http://svn.apache.org/viewcvs?rev=265794&view=rev Log: NUTCH-65, Handles more modification-date format
Added: lucene/nutch/trunk/lib/commons-lang-2.1.jar (with props) Modified: lucene/nutch/trunk/src/plugin/index-more/src/java/org/apache/nutch/indexer/more/MoreIndexingFilter.java Added: lucene/nutch/trunk/lib/commons-lang-2.1.jar URL: http://svn.apache.org/viewcvs/lucene/nutch/trunk/lib/commons-lang-2.1.jar?rev=265794&view=auto ============================================================================== Binary file - no diff available. Propchange: lucene/nutch/trunk/lib/commons-lang-2.1.jar ------------------------------------------------------------------------------ svn:mime-type = application/octet-stream Modified: lucene/nutch/trunk/src/plugin/index-more/src/java/org/apache/nutch/indexer/more/MoreIndexingFilter.java URL: http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/plugin/index-more/src/java/org/apache/nutch/indexer/more/MoreIndexingFilter.java?rev=265794&r1=265793&r2=265794&view=diff ============================================================================== --- lucene/nutch/trunk/src/plugin/index-more/src/java/org/apache/nutch/indexer/more/MoreIndexingFilter.java (original) +++ lucene/nutch/trunk/src/plugin/index-more/src/java/org/apache/nutch/indexer/more/MoreIndexingFilter.java Thu Sep 1 15:20:51 2005 @@ -53,7 +53,7 @@ import java.util.Enumeration; import java.util.Properties; - +import org.apache.commons.lang.time.DateUtils; /** * Add (or reset) a few metaData properties as respective fields * (if they are available), so that they can be displayed by more.jsp @@ -133,21 +133,37 @@ try { time = HttpDateFormat.toLong(date); } catch (ParseException e) { - // try to parse it as date in alternative format - String date2 = date; - try { - if (date.length() > 25 ) date2 = date.substring(0, 25); - DateFormat df = new SimpleDateFormat("EEE, dd MMM yyyy HH:mm:ss", Locale.US); - time = df.parse(date2).getTime(); - } catch (Exception e1) { - try { - if (date.length() > 24 ) date2 = date.substring(0, 24); - DateFormat df = new SimpleDateFormat("EEE MMM dd HH:mm:ss yyyy", Locale.US); - time = df.parse(date2).getTime(); - } catch (Exception e2) { - LOG.warning(url + ": can't parse erroneous date: " + date); - } - } + // try to parse it as date in alternative format + try { + Date parsedDate = DateUtils.parseDate(date, + new String [] { + "EEE MMM dd HH:mm:ss yyyy", + "EEE MMM dd HH:mm:ss yyyy zzz", + "EEE, MMM dd HH:mm:ss yyyy zzz", + "EEE, dd MMM yyyy HH:mm:ss zzz", + "EEE,dd MMM yyyy HH:mm:ss zzz", + "EEE, dd MMM yyyy HH:mm:sszzz", + "EEE, dd MMM yyyy HH:mm:ss", + "EEE, dd-MMM-yy HH:mm:ss zzz", + "yyyy/MM/dd HH:mm:ss.SSS zzz", + "yyyy/MM/dd HH:mm:ss.SSS", + "yyyy/MM/dd HH:mm:ss zzz", + "yyyy/MM/dd", + "yyyy.MM.dd HH:mm:ss", + "yyyy-MM-dd HH:mm", + "MMM dd yyyy HH:mm:ss. zzz", + "MMM dd yyyy HH:mm:ss zzz", + "dd.MM.yyyy HH:mm:ss zzz", + "dd MM yyyy HH:mm:ss zzz", + "dd.MM.yyyy; HH:mm:ss", + "dd.MM.yyyy HH:mm:ss", + "dd.MM.yyyy zzz" + }); + time = parsedDate.getTime(); + // LOG.warning(url + ": parsed date: " + date +" to:"+time); + } catch (Exception e2) { + LOG.warning(url + ": can't parse erroneous date: " + date); + } } return time; }