commit: d42179d8d444cc35698e06d2d7520e6f7b383f17 Author: Alex Legler <alex <AT> a3li <DOT> li> AuthorDate: Mon Feb 23 23:54:59 2015 +0000 Commit: Alex Legler <a3li <AT> gentoo <DOT> org> CommitDate: Mon Feb 23 23:55:20 2015 +0000 URL: http://sources.gentoo.org/gitweb/?p=proj/ag.git;a=commit;h=d42179d8
Fall back to charlock_holmes; also add stub encoding extraction from the message for later on --- lib/rendering.rb | 22 ++++++++++++++++------ lib/utils.rb | 4 ++-- 2 files changed, 18 insertions(+), 8 deletions(-) diff --git a/lib/rendering.rb b/lib/rendering.rb index 3e77414..7649fcf 100644 --- a/lib/rendering.rb +++ b/lib/rendering.rb @@ -8,33 +8,43 @@ module Ag::Rendering content_type = mime_split(mail.parts.first.content_type) if content_type == 'text/plain' or content_type == 'text/html' - to_content(content_type, mail.parts.first.decoded) + to_content(content_type, mail.parts.first.decoded, get_encoding(mail.parts.first)) else # Nested multipart? if mail.parts.first.multipart? content_type = mime_split(mail.parts.first.parts.first.content_type) if content_type == 'text/plain' or content_type == 'text/html' - to_content(content_type, mail.parts.first.parts.first.decoded) + to_content(content_type, mail.parts.first.parts.first.decoded, get_encoding(mail.parts.first.parts.first)) else raise "Cannot find body: #{mail.message_id}" end # Specialty: Gnus/Emacs signed emails with no explicit multipart type elsif mime_split(mail.content_type) == 'multipart/signed' - to_content('text/plain', mail.parts.first.decoded) + to_content('text/plain', mail.parts.first.decoded, get_encoding(mail.parts.first)) end end else # No Content-Type, assume plain text (git-send-email) if mail.content_type == nil - to_content('text/plain', mail.body.decoded) + to_content('text/plain', mail.body.decoded, get_encoding(mail)) else - to_content(mime_split(mail.content_type), mail.body.decoded) + to_content(mime_split(mail.content_type), mail.body.decoded, get_encoding(mail)) end end end - def self.to_content(content_type, content) + def self.get_encoding(part) + if part.content_type_parameters + part.content_type_parameters['charset'] + else + nil + end + end + + def self.to_content(content_type, content, charset = nil) + #content = content.force_encoding(charset) if charset + if content_type == 'text/plain' escaped_content = CGI::escapeHTML(content) escaped_content.lines.map do |line| diff --git a/lib/utils.rb b/lib/utils.rb index f8d546e..38349e0 100644 --- a/lib/utils.rb +++ b/lib/utils.rb @@ -8,13 +8,13 @@ module Ag module Utils module_function - def fix_encoding(str) + def fix_encoding2(str) s = str.encode('UTF-8', 'UTF-8', invalid: :replace, replace: '') s = s.unpack('C*').pack('U*') unless s.valid_encoding? s end - def fix_encoding_old(str, fail_hard = false) + def fix_encoding(str, fail_hard = false) detection = CharlockHolmes::EncodingDetector.detect(str) CharlockHolmes::Converter.convert(str, detection[:encoding], 'UTF-8') rescue => e