sebbASF commented on a change in pull request #58:
URL:
https://github.com/apache/incubator-ponymail-foal/pull/58#discussion_r711609273
##########
File path: tools/archiver.py
##########
@@ -482,15 +482,31 @@ def default_empty_string(value):
message_date = email.utils.parsedate_tz(
str(msg_metadata.get("archived-at"))
)
-
if not message_date:
- epoch = time.time()
+ print("No message date could be derived from the Date: header,
looking elsewhere.")
+ # See if we have a "From" header line in the raw email, we can use
+ first_line = raw_msg.split(b"\n", 1)[0].decode("us-ascii")
+ if first_line.startswith("From "):
+ # If we have one, the date must be the third element when
splitting by single space.
+ env_from_date = first_line.split(" ", 2)[-1] # Split twice,
grab last element.
+ message_date = email.utils.parsedate_tz(env_from_date)
+ if message_date:
+ print("Found date in envelope FROM header: %s" %
env_from_date)
+ # Otherwise, look for a Received: header we can scan
+ if not message_date:
+ for recv_from in msg.get_all('received', []): # We may have
multiple of these, not all have "from".
+ m = re.match(r"from[^;]+?;\s+(.+?)(?:$|[\r\n])", recv_from)
+ if m:
+ message_date = email.utils.parsedate_tz(m.group(1))
+ if message_date:
+ print("Found date in Received header: %s" %
m.group(1))
+ break
+ if not message_date:
+ print("Could not find any valid dates in email headers, using
current time")
+ epoch = time.time()
Review comment:
For archiving, I agree that current time is OK.
For importing, I think it would be better to use a value that does not vary
between imports.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]