From: Pieter van de Bruggen <[email protected]>

When we import a report, pushing the entire content through the database using
DelayedJob is fairly inefficient - we were shipping megabytes of YAML around,
with some inefficient encoding, to get the data imported.

Writing the report to a file, then importing by filename, increases
performance substantially, at the cost of tying us to workers on the same
node.  Right now, that seems a worthwhile trade-off.

Reviewed-By: Matt Robinson <[email protected]>

Based on commit 1d00feb7a66c3f8001eec87d22c1cef823c5b945 by
  Pieter van de Bruggen <[email protected]>
---
 app/controllers/reports_controller.rb       |   18 ++++++++++++++++--
 app/models/report.rb                        |   10 ++++++----
 config/initializers/delayed_job.rb          |   11 +++++++----
 lib/tasks/import_reports.rake               |    7 +++----
 spec/controllers/reports_controller_spec.rb |   27 +++++++++++++++++----------
 spool/.gitignore                            |    3 +++
 6 files changed, 52 insertions(+), 24 deletions(-)
 create mode 100644 spool/.gitignore

diff --git a/app/controllers/reports_controller.rb 
b/app/controllers/reports_controller.rb
index f8752d3..5952c73 100644
--- a/app/controllers/reports_controller.rb
+++ b/app/controllers/reports_controller.rb
@@ -27,8 +27,22 @@ class ReportsController < InheritedResources::Base
 
   def upload
     begin
-      Report.create_from_yaml(params[:report][:report])
-      render :text => "Report successfully uploaded"
+      yaml = params[:report][:report]
+      md5  = Digest::MD5.hexdigest(yaml)
+      file = Rails.root + 'spool' + "#{md5}.yaml"
+      n    = 0
+
+      begin
+        fd = File.new(file, File::CREAT|File::EXCL|File::RDWR, 0600)
+        fd.print yaml
+        fd.close
+
+        Report.delay.create_from_yaml_file(file.to_s, :delete => true)
+        render :text => "Report queued for import as #{md5}"
+      rescue Errno::EEXIST
+        file = Rails.root + 'spool' + "#{md5}-#{n += 1}.yaml"
+        retry
+      end
     rescue => e
       error_text = "ERROR! ReportsController#upload failed:"
       Rails.logger.debug error_text
diff --git a/app/models/report.rb b/app/models/report.rb
index 3fc20e3..5012b04 100644
--- a/app/models/report.rb
+++ b/app/models/report.rb
@@ -79,6 +79,12 @@ class Report < ActiveRecord::Base
     attribute_hash
   end
 
+  def self.create_from_yaml_file(report_file, options = {})
+    report = create_from_yaml(File.read(report_file))
+    File.unlink(report_file) if options[:delete]
+    return report
+  end
+
   def self.create_from_yaml(report_yaml)
     raw_report = YAML.load(report_yaml)
 
@@ -96,10 +102,6 @@ class Report < ActiveRecord::Base
     report
   end
 
-  class << self
-    handle_asynchronously :create_from_yaml
-  end
-
   def assign_to_node
     self.node = Node.find_or_create_by_name(self.host)
   end
diff --git a/config/initializers/delayed_job.rb 
b/config/initializers/delayed_job.rb
index 29183bd..d8f54e2 100644
--- a/config/initializers/delayed_job.rb
+++ b/config/initializers/delayed_job.rb
@@ -1,11 +1,12 @@
-DELAYED_JOB_PID_PATH = "#{Rails.root}/tmp/pids/delayed_job.pid"
+DELAYED_JOB_PID_PATH = Pathname.new 
"#{Rails.root}/tmp/pids/delayed_job_#{Rails.env}"
+DELAYED_JOB_PID_PATH.mkpath
 
 Delayed::Worker.destroy_failed_jobs = false
 Delayed::Worker.max_attempts = 3
 
 def start_delayed_job
   Thread.new do
-    `#{Rails.root}/script/delayed_job -p dashboard -m start`
+    `#{Rails.root}/script/delayed_job --pid-dir=#{DELAYED_JOB_PID_PATH} -p 
dashboard -m start`
   end
 end
 
@@ -19,6 +20,8 @@ def process_is_dead?
   end
 end
 
-if !File.exist?(DELAYED_JOB_PID_PATH) && process_is_dead?
-  start_delayed_job
+unless Rails.env == 'test'
+  if !File.exist?(DELAYED_JOB_PID_PATH + 'delayed_job.pid') && process_is_dead?
+    start_delayed_job
+  end
 end
diff --git a/lib/tasks/import_reports.rake b/lib/tasks/import_reports.rake
index 247fe49..46b6db7 100644
--- a/lib/tasks/import_reports.rake
+++ b/lib/tasks/import_reports.rake
@@ -9,14 +9,13 @@ namespace :reports do
     plural = lambda{|str, count| str + (count != 1 ? 's' : '')}
     reports = FileList[File.join(report_dir, '**', '*.yaml')]
 
-    STDOUT.puts "Importing #{reports.size} #{plural['report', reports.size]} 
from #{report_dir}"
+    STDOUT.puts "Importing #{reports.size} #{plural['report', reports.size]} 
from #{report_dir} in the background"
 
     skipped = 0
     pbar = ProgressBar.new("Importing:", reports.size, STDOUT)
     reports.each do |report|
-      data = File.read(report)
       success = begin
-        Report.create_from_yaml(data)
+        Report.delay.create_from_yaml_file(report)
       rescue => e
         puts e
         false
@@ -28,7 +27,7 @@ namespace :reports do
 
     successes = reports.size - skipped
 
-    STDOUT.puts "#{successes} of #{reports.size} #{plural['report', 
successes]} imported"
+    STDOUT.puts "#{successes} of #{reports.size} #{plural['report', 
successes]} queued"
     STDOUT.puts "#{skipped} #{plural['report', skipped]} skipped" if skipped > 0
   end
 end
diff --git a/spec/controllers/reports_controller_spec.rb 
b/spec/controllers/reports_controller_spec.rb
index 667d307..d7945ef 100644
--- a/spec/controllers/reports_controller_spec.rb
+++ b/spec/controllers/reports_controller_spec.rb
@@ -15,12 +15,15 @@ describe ReportsController do
     describe "correctly formatted POST", :shared => true do
       it { should_not raise_error }
       it { should change(Report, :count).by(1) }
-      it { should change{ Node.find_by_name("sample_node")}.from(nil) }
+      it { should change { Node.find_by_name("sample_node") }.from(nil) }
     end
 
     describe "with a POST from Puppet 2.6.x" do
       subject do
-        lambda { post_with_body('upload', @yaml, :content_type => 
'application/x-yaml') }
+        lambda {
+          post_with_body('upload', @yaml, :content_type => 
'application/x-yaml')
+          Delayed::Worker.new.work_off
+        }
       end
 
       it_should_behave_like "correctly formatted POST"
@@ -28,7 +31,10 @@ describe ReportsController do
 
     describe "with a POST from Puppet 0.25.x" do
       subject do
-        lambda { post('upload', :report => @yaml) }
+        lambda {
+          post('upload', :report => @yaml)
+          Delayed::Worker.new.work_off
+        }
       end
 
       it_should_behave_like "correctly formatted POST"
@@ -36,7 +42,10 @@ describe ReportsController do
 
     describe "with a POST with a report inside the report parameter" do
       subject do
-        lambda { post('upload', :report => { :report => @yaml }) }
+        lambda {
+          post('upload', :report => { :report => @yaml })
+          Delayed::Worker.new.work_off
+        }
       end
 
       it_should_behave_like "correctly formatted POST"
@@ -47,9 +56,8 @@ describe ReportsController do
         post('upload', :report => "" )
       end
 
-      it "should return a 406 response and the error text" do
-        response.code.should == '406'
-        response.body.should == "ERROR! ReportsController#upload failed: The 
supplied report is in invalid format 'FalseClass', expected 
'Puppet::Transaction::Report'"
+      it "should be 200, because we queued the job" do
+        response.code.should == '200'
       end
     end
 
@@ -58,9 +66,8 @@ describe ReportsController do
         post('upload', :report => "foo bar baz bad data invalid")
       end
 
-      it "should return a 406 response and the error text" do
-        response.code.should == '406'
-        response.body.should == "ERROR! ReportsController#upload failed: The 
supplied report is in invalid format 'String', expected 
'Puppet::Transaction::Report'"
+      it "should be 200, because we queued the job" do
+        response.code.should == '200'
       end
     end
   end
diff --git a/spool/.gitignore b/spool/.gitignore
new file mode 100644
index 0000000..69c9583
--- /dev/null
+++ b/spool/.gitignore
@@ -0,0 +1,3 @@
+# This directory is used to spool YAML reports for background importing.
+# None of the content lives beyond the time to load the report.
+*
-- 
1.7.5.4

-- 
You received this message because you are subscribed to the Google Groups 
"Puppet Developers" group.
To post to this group, send email to [email protected].
To unsubscribe from this group, send email to 
[email protected].
For more options, visit this group at 
http://groups.google.com/group/puppet-dev?hl=en.

Reply via email to