commit: 2a13f18aa0a7ac3fe7d19eeea45842de818a615c Author: Alex Legler <alex <AT> a3li <DOT> li> AuthorDate: Mon Feb 23 14:21:27 2015 +0000 Commit: Alex Legler <a3li <AT> gentoo <DOT> org> CommitDate: Mon Feb 23 14:21:27 2015 +0000 URL: http://sources.gentoo.org/gitweb/?p=proj/ag.git;a=commit;h=2a13f18a
use more threads! --- Gemfile | 4 +++- Gemfile.lock | 4 ++++ ag | 12 +++++++++--- lib/storage.rb | 10 +++++----- lib/threading.rb | 4 +++- 5 files changed, 24 insertions(+), 10 deletions(-) diff --git a/Gemfile b/Gemfile index 6689dd1..c676b7c 100644 --- a/Gemfile +++ b/Gemfile @@ -4,4 +4,6 @@ gem 'mail' gem 'maildir' gem 'elasticsearch' gem 'sanitize' -gem 'charlock_holmes' \ No newline at end of file +gem 'charlock_holmes' +gem 'parallel' +gem 'ruby-progressbar' \ No newline at end of file diff --git a/Gemfile.lock b/Gemfile.lock index ca40918..d55552e 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -24,6 +24,8 @@ GEM mini_portile (~> 0.6.0) nokogumbo (1.2.0) nokogiri + parallel (1.4.0) + ruby-progressbar (1.7.1) sanitize (3.1.1) crass (~> 1.0.1) nokogiri (>= 1.4.4) @@ -37,4 +39,6 @@ DEPENDENCIES elasticsearch mail maildir + parallel + ruby-progressbar sanitize diff --git a/ag b/ag index dbb1584..ca81b22 100755 --- a/ag +++ b/ag @@ -7,6 +7,8 @@ require 'mail' require 'maildir' require 'elasticsearch' require 'optparse' +require 'parallel' +require 'ruby-progressbar' require_relative 'lib/utils' require_relative 'lib/threading' require_relative 'lib/rendering' @@ -111,7 +113,9 @@ $es.transport.reload_connections! def do_full Ag::Storage.create_index($options.name) - $maildir.list(:cur).each do |maildir_message| + messages = $maildir.list(:cur) + + Parallel.each(messages, progress: "Importing #{$options.name}") do |maildir_message| mail = maildir_message.data begin @@ -126,14 +130,16 @@ def do_full end def do_incremental - $maildir.list(:new).each do |maildir_message| + messages = $maildir.list(:cur) + + Parallel.each(messages, progress: "Importing #{$options.name}") do |maildir_message| mail = maildir_message.data begin Ag::Storage.store($options.name, mail, maildir_message.filename) maildir_message.process unless $options.readonly rescue => e - $stderr.puts "Cannot save message #{mail.message_id} (file #{maildir_message.filename}): #{e.message}" + $stderr.puts "Cannot save message #{mail.message_id} (file #{maildir_message.filename}): #{e.message}" if $options.debug next end end diff --git a/lib/storage.rb b/lib/storage.rb index f255633..d32ba2b 100644 --- a/lib/storage.rb +++ b/lib/storage.rb @@ -7,7 +7,7 @@ module Ag::Storage begin $es.indices.delete index: 'ml-' + list rescue Elasticsearch::Transport::Transport::Errors::NotFound => e - $stderr.puts "Index did not exist yet. Creating." + $stderr.puts "Index did not exist yet. Creating." if $options.debug end $es.indices.create( @@ -84,10 +84,10 @@ module Ag::Storage content = Ag::Utils.fix_encoding(raw_content || '', true).strip if content == '' - $stderr.puts "#{message.message_id}: Content empty?" + $stderr.puts "#{message.message_id}: Content empty?" if $options.debug end rescue => e - $stderr.puts "Cannot render message #{message.message_id} (file: #{filename}): #{e}" + $stderr.puts "Cannot render message #{message.message_id} (file: #{filename}): #{e}" if $options.debug end content @@ -174,7 +174,7 @@ module Ag::Storage ) end - def fix_threading(list) + def fix_threading(list, pass) result = $es.search( index: 'ml-' + list, size: 100000, @@ -201,7 +201,7 @@ module Ag::Storage } ) - result['hits']['hits'].each do |hit| + Parallel.each(result['hits']['hits'], progress: "Calculating Threading (Pass #{pass})") do |hit| msg = resolve_message_id(list, hit['_source']['raw_parent']) unless msg == nil diff --git a/lib/threading.rb b/lib/threading.rb index 8988f23..212bb98 100644 --- a/lib/threading.rb +++ b/lib/threading.rb @@ -57,11 +57,13 @@ module Ag def calc(list) number_of_root_threads = -1 + pass = 1 loop do - new_num = Ag::Storage.fix_threading(list) + new_num = Ag::Storage.fix_threading(list, pass) break if new_num == number_of_root_threads number_of_root_threads = new_num + pass += 1 end end end