commit: d38a68108d6224c7d5e8fd97fcdfe26ef0e0cf8c Author: Alex Legler <alex <AT> a3li <DOT> li> AuthorDate: Mon Feb 23 20:57:35 2015 +0000 Commit: Alex Legler <a3li <AT> gentoo <DOT> org> CommitDate: Mon Feb 23 20:57:35 2015 +0000 URL: http://sources.gentoo.org/gitweb/?p=proj/ag.git;a=commit;h=d38a6810
More robust header parsing (again hopefully) --- lib/storage.rb | 54 +++++++++++++++++++++++++++++++++++++++--------------- lib/utils.rb | 29 +++++++++++++++++++++++++++++ 2 files changed, 68 insertions(+), 15 deletions(-) diff --git a/lib/storage.rb b/lib/storage.rb index d32ba2b..6409df2 100644 --- a/lib/storage.rb +++ b/lib/storage.rb @@ -28,6 +28,18 @@ module Ag::Storage } } }, + received: { + properties: { + hop: { + type: 'string', + index: 'not_analyzed' + }, + date: { + type: 'date', + format: 'dateOptionalTime' + } + } + }, cc: { type: 'string' }, @@ -81,7 +93,7 @@ module Ag::Storage content = 'Cannot parse MIME/contents.' begin raw_content = Ag::Rendering::HTMLizer.HTMLize(message) - content = Ag::Utils.fix_encoding(raw_content || '', true).strip + content = Ag::Utils.fix_encoding(raw_content || '').strip if content == '' $stderr.puts "#{message.message_id}: Content empty?" if $options.debug @@ -125,24 +137,35 @@ module Ag::Storage def store(list, message, filename) content = get_content(message, filename) - identifier = message['X-Archives-Hash'].value + identifier = nil + begin + identifier = message['X-Archives-Hash'].value + rescue NoMethodError + raise 'No archives hash' + end + raw_parent = Ag::Threading.get_parent_message_id(message) - from = Ag::Utils.fix_encoding(message[:from].formatted.first) - from_realname = from.gsub(/<(.*)>/, '').strip + from = Ag::Utils.resolve_address_header(message, :from).first + from_realname = Ag::Utils.get_sender_displayname(message) + to = Ag::Utils.resolve_address_header(message, :to) + cc = Ag::Utils.resolve_address_header(message, :cc) + subject = Ag::Utils.fix_encoding(message.subject) - to = '' - if message[:to] - to = Ag::Utils.fix_encoding(message[:to].formatted.join(',')) - end + date = [message.received].flatten.first.field.date_time - cc = '' - if message[:cc] - cc = Ag::Utils.fix_encoding(message[:cc].formatted.join(',')) + received = [] + [message.received].flatten.each do |hop| + begin + received << { + hop: hop.field.info, + date: hop.field.date_time + } + rescue => e + next + end end - subject = Ag::Utils.fix_encoding(message.subject) - attachments = [] if message.has_attachments? message.attachments.each do |attachment| @@ -164,10 +187,11 @@ module Ag::Storage cc: cc, from: from, from_realname: from_realname, - date: message.date, - month: ("%i%02i" % [message.date.year, message.date.month]).to_i, # this is a sortable number! + date: date, + month: ("%i%02i" % [date.year, date.month]).to_i, # this is a sortable number! content: content, attachments: attachments, + received: received, raw_parent: raw_parent, raw_filename: filename } diff --git a/lib/utils.rb b/lib/utils.rb index fc4427e..d621a2e 100644 --- a/lib/utils.rb +++ b/lib/utils.rb @@ -33,5 +33,34 @@ module Ag id end + + def resolve_address_header(message, header) + if message[header].is_a? Mail::StructuredField + # Good header, properly parsed + message[header].addrs.map {|s| fix_encoding(s.to_s)} + elsif nil == message[header] + # Header not set, return empty + [] + else + # Parsing failed, do best-effort parsing + [message[header].to_s.split(/,\s*/)].flatten.map {|s| fix_encoding(s)} + end + rescue ArgumentError + [] + end + + def get_sender_displayname(message) + if message[:from].is_a? Mail::StructuredField + fix_encoding(message[:from].addrs.first.to_s).strip + else + if message[:from].respond_to? :addrs and display_name = message[:from].addrs.first.display_name + fix_encoding(display_name).strip + else + fix_encoding(message[:from].to_s).strip + end + end + rescue ArgumentError + '' + end end end \ No newline at end of file