commit:     d38a68108d6224c7d5e8fd97fcdfe26ef0e0cf8c
Author:     Alex Legler <alex <AT> a3li <DOT> li>
AuthorDate: Mon Feb 23 20:57:35 2015 +0000
Commit:     Alex Legler <a3li <AT> gentoo <DOT> org>
CommitDate: Mon Feb 23 20:57:35 2015 +0000
URL:        http://sources.gentoo.org/gitweb/?p=proj/ag.git;a=commit;h=d38a6810

More robust header parsing (again hopefully)

---
 lib/storage.rb | 54 +++++++++++++++++++++++++++++++++++++++---------------
 lib/utils.rb   | 29 +++++++++++++++++++++++++++++
 2 files changed, 68 insertions(+), 15 deletions(-)

diff --git a/lib/storage.rb b/lib/storage.rb
index d32ba2b..6409df2 100644
--- a/lib/storage.rb
+++ b/lib/storage.rb
@@ -28,6 +28,18 @@ module Ag::Storage
                   }
                 }
               },
+              received: {
+                properties: {
+                  hop: {
+                    type: 'string',
+                    index: 'not_analyzed'
+                  },
+                  date: {
+                    type: 'date',
+                    format: 'dateOptionalTime'
+                  }
+                }
+              },
               cc: {
                 type: 'string'
               },
@@ -81,7 +93,7 @@ module Ag::Storage
     content = 'Cannot parse MIME/contents.'
     begin
       raw_content = Ag::Rendering::HTMLizer.HTMLize(message)
-      content = Ag::Utils.fix_encoding(raw_content || '', true).strip
+      content = Ag::Utils.fix_encoding(raw_content || '').strip
 
       if content == ''
         $stderr.puts "#{message.message_id}: Content empty?" if $options.debug
@@ -125,24 +137,35 @@ module Ag::Storage
   def store(list, message, filename)
     content = get_content(message, filename)
 
-    identifier = message['X-Archives-Hash'].value
+    identifier = nil
+    begin
+      identifier = message['X-Archives-Hash'].value
+    rescue NoMethodError
+      raise 'No archives hash'
+    end
+
     raw_parent = Ag::Threading.get_parent_message_id(message)
 
-    from = Ag::Utils.fix_encoding(message[:from].formatted.first)
-    from_realname = from.gsub(/<(.*)>/, '').strip
+    from = Ag::Utils.resolve_address_header(message, :from).first
+    from_realname = Ag::Utils.get_sender_displayname(message)
+    to = Ag::Utils.resolve_address_header(message, :to)
+    cc = Ag::Utils.resolve_address_header(message, :cc)
+    subject = Ag::Utils.fix_encoding(message.subject)
 
-    to = ''
-    if message[:to]
-      to = Ag::Utils.fix_encoding(message[:to].formatted.join(','))
-    end
+    date = [message.received].flatten.first.field.date_time
 
-    cc = ''
-    if message[:cc]
-      cc = Ag::Utils.fix_encoding(message[:cc].formatted.join(','))
+    received = []
+    [message.received].flatten.each do |hop|
+      begin
+        received << {
+          hop: hop.field.info,
+          date: hop.field.date_time
+        }
+      rescue => e
+        next
+      end
     end
 
-    subject = Ag::Utils.fix_encoding(message.subject)
-
     attachments = []
     if message.has_attachments?
       message.attachments.each do |attachment|
@@ -164,10 +187,11 @@ module Ag::Storage
         cc: cc,
         from: from,
         from_realname: from_realname,
-        date: message.date,
-        month: ("%i%02i" % [message.date.year, message.date.month]).to_i, # 
this is a sortable number!
+        date: date,
+        month: ("%i%02i" % [date.year, date.month]).to_i, # this is a sortable 
number!
         content: content,
         attachments: attachments,
+        received: received,
         raw_parent: raw_parent,
         raw_filename: filename
       }

diff --git a/lib/utils.rb b/lib/utils.rb
index fc4427e..d621a2e 100644
--- a/lib/utils.rb
+++ b/lib/utils.rb
@@ -33,5 +33,34 @@ module Ag
 
       id
     end
+
+    def resolve_address_header(message, header)
+      if message[header].is_a? Mail::StructuredField
+        # Good header, properly parsed
+        message[header].addrs.map {|s| fix_encoding(s.to_s)}
+      elsif nil == message[header]
+        # Header not set, return empty
+        []
+      else
+        # Parsing failed, do best-effort parsing
+        [message[header].to_s.split(/,\s*/)].flatten.map {|s| fix_encoding(s)}
+      end
+    rescue ArgumentError
+      []
+    end
+
+    def get_sender_displayname(message)
+      if message[:from].is_a? Mail::StructuredField
+        fix_encoding(message[:from].addrs.first.to_s).strip
+      else
+        if message[:from].respond_to? :addrs and display_name = 
message[:from].addrs.first.display_name
+          fix_encoding(display_name).strip
+        else
+          fix_encoding(message[:from].to_s).strip
+        end
+      end
+    rescue ArgumentError
+      ''
+    end
   end
 end
\ No newline at end of file

Reply via email to