Mostafa Mokhtar created IMPALA-6744:
---------------------------------------

             Summary: Inline codegend_compare_fn_ to avoid per row memory loads 
and function call
                 Key: IMPALA-6744
                 URL: https://issues.apache.org/jira/browse/IMPALA-6744
             Project: IMPALA
          Issue Type: Bug
          Components: Backend
    Affects Versions: Impala 2.13.0
            Reporter: Mostafa Mokhtar
            Assignee: Tianyi Wang
         Attachments: percentile query profile.txt

While evaluating Sort performance I noticed that the codegened compare function 
is not inlined which results in large overhead per row. 

Expected speedup is 10-15%

{code}
  /// Returns a negative value if lhs is less than rhs, a positive value if lhs 
is
  /// greater than rhs, or 0 if they are equal. All exprs (ordering_exprs_lhs_ 
and
  /// ordering_exprs_rhs_) must have been prepared and opened before calling 
this,
  /// i.e. 'sort_key_exprs' in the constructor must have been opened.
  int ALWAYS_INLINE Compare(const TupleRow* lhs, const TupleRow* rhs) const {
    return codegend_compare_fn_ == NULL ?
        CompareInterpreted(lhs, rhs) :
        (*codegend_compare_fn_)(ordering_expr_evals_lhs_.data(),
            ordering_expr_evals_rhs_.data(), lhs, rhs);
  } 
{code}

>From Perf

{code}
      │    bool Sorter::TupleSorter::Less(const TupleRow* lhs, const TupleRow* 
rhs) {                                                                          
                                                                                
 ▒
  7.43 │      push   %rbp                                                       
                                                                                
                                                                                
 ▒
  3.23 │      mov    %rsp,%rbp                                                  
                                                                                
                                                                                
 ▒
  9.44 │      push   %r12                                                       
                                                                                
                                                                                
 ▒
  2.69 │      push   %rbx                                                       
                                                                                
                                                                                
 ▒
  3.89 │      mov    %rsi,%r12                                                  
                                                                                
                                                                                
 ▒
  2.98 │      mov    %rdi,%rbx                                                  
                                                                                
                                                                                
 ▒
  6.06 │      sub    $0x10,%rsp                                                 
                                                                                
                                                                                
 ◆
       │      --num_comparisons_till_free_;                                     
                                                                                
                                                                                
 ▒
       │      DCHECK_GE(num_comparisons_till_free_, 0);                         
                                                                                
                                                                                
 ▒
       │      if (UNLIKELY(num_comparisons_till_free_ == 0)) {                  
                                                                                
                                                                                
 ▒
  3.75 │      subl   $0x1,0x18(%rdi)                                            
                                                                                
                                                                                
 ▒
  9.42 │    ↓ je     58                                                         
                                                                                
                                                                                
 ▒
       │        parent_->expr_results_pool_.Clear();                            
                                                                                
                                                                                
 ▒
       │        num_comparisons_till_free_ = state_->batch_size();              
                                                                                
                                                                                
 ▒
       │      }                                                                 
                                                                                
                                                                                
 ▒
       │      return comparator_.Less(lhs, rhs);                                
                                                                                
                                                                                
 ▒
  1.09 │17:   mov    0x10(%rbx),%rdi                                            
                                                                                
                                                                                
 ▒
       │      /// Returns a negative value if lhs is less than rhs, a positive 
value if lhs is                                                                 
                                                                                
  ▒
       │      /// greater than rhs, or 0 if they are equal. All exprs 
(ordering_exprs_lhs_ and                                                        
                                                                                
           ▒
       │      /// ordering_exprs_rhs_) must have been prepared and opened 
before calling this,                                                            
                                                                                
       ▒
       │      /// i.e. 'sort_key_exprs' in the constructor must have been 
opened.                                                                         
                                                                                
       ▒
       │      int ALWAYS_INLINE Compare(const TupleRow* lhs, const TupleRow* 
rhs) const {                                                                    
                                                                                
    ▒
       │        return codegend_compare_fn_ == NULL ?                           
                                                                                
                                                                                
 ▒
  2.69 │      mov    0x58(%rdi),%rax                                            
                                                                                
                                                                                
 ▒
       │            CompareInterpreted(lhs, rhs) :                              
                                                                                
                                                                                
 ▒
       │            (*codegend_compare_fn_)(ordering_expr_evals_lhs_.data(),    
                                                                                
                                                                                
 ▒
       │                ordering_expr_evals_rhs_.data(), lhs, rhs);             
                                                                                
                                                                                
 ▒
  5.43 │      test   %rax,%rax                                                  
                                                                                
                                                                                
 ▒
       │    ↓ je     40                                                         
                                                                                
                                                                                
 ▒
  6.85 │      mov    0x20(%rdi),%rsi                                            
                                                                                
                                                                                
 ▒
  0.86 │      mov    %rdx,%rcx                                                  
                                                                                
                                                                                
 ▒
  2.55 │      mov    0x8(%rdi),%rdi                                             
                                                                                
                                                                                
 ▒
  3.38 │      mov    %r12,%rdx                                                  
                                                                                
                                                                                
 ▒
  6.10 │    → callq  *(%rax)                                                    
                                                                                
                                                                                
 ▒
       │    }                                                                   
                                                                                
                                                                                
 ▒
  5.84 │      add    $0x10,%rsp                                                 
                                                                                
                                                                                
 ▒
       │      /// All exprs (ordering_exprs_lhs_ and ordering_exprs_rhs_) must 
have been prepared                                                              
                                                                                
  ▒
       │      /// and opened before calling this.                               
                                                                                
                                                                                
 ▒
       │      /// Force inlining because it tends not to be always inlined at 
callsites, even in                                                              
                                                                                
   ▒
       │      /// hot loops.                                                    
                                                                                
                                                                                
 ▒
       │      bool ALWAYS_INLINE Less(const TupleRow* lhs, const TupleRow* rhs) 
const {                                                                         
                                                                                
 ▒
       │        return Compare(lhs, rhs) < 0;                                   
                                                                                
                                                                                
 ▒
  1.77 │      shr    $0x1f,%eax                                                 
                                                                                
                                                                                
 ▒
  7.91 │      pop    %rbx                                                       
                                                                                
                                                                                
 ▒
  4.11 │      pop    %r12                                                       
                                                                                
                                                                                
 ▒
  0.49 │      pop    %rbp                                                       
                                                                                
                                                                                
 ▒
  1.75 │    ← retq                                                              
                                                                                
                                                                                
 ▒
       │      /// i.e. 'sort_key_exprs' in the constructor must have been 
opened.                                                                         
                                                                                
       ▒
       │      int ALWAYS_INLINE Compare(const TupleRow* lhs, const TupleRow* 
rhs) const {                                                                    
                                                                                
    ▒
       │        return codegend_compare_fn_ == NULL ?                           
                                                                                
                                                                                
 ▒
       │            CompareInterpreted(lhs, rhs) :                              
                                                                                
                                                                                
 ▒
       │            (*codegend_compare_fn_)(ordering_expr_evals_lhs_.data(),    
                                                                                
                                                                                
 ▒
       │                ordering_expr_evals_rhs_.data(), lhs, rhs);             
                                                                                
                                                                                
 ▒
       │40:   mov    %r12,%rsi                                                  
                                                                                
                                                                                
 ▒
       │    → callq  
impala::TupleRowComparator::CompareInterpreted(impala::TupleRow const*, 
impala::TupleRow const*) const                                                  
                                                                    ▒
       │      add    $0x10,%rsp                                                 
                                                                                
                                                                                
 ▒
       │      /// All exprs (ordering_exprs_lhs_ and ordering_exprs_rhs_) must 
have been prepared                                                              
                                                                                
  ▒
Press 'h' for help on key bindings
{code}



--
This message was sent by Atlassian JIRA
(v7.6.3#76005)

Reply via email to