Michael Ho created IMPALA-8299:
----------------------------------

             Summary: GroupingAggregator::Partition::Close() may access an 
uninitialized hash table
                 Key: IMPALA-8299
                 URL: https://issues.apache.org/jira/browse/IMPALA-8299
             Project: IMPALA
          Issue Type: Bug
          Components: Backend
    Affects Versions: Impala 3.1.0, Impala 3.2.0
            Reporter: Michael Ho
            Assignee: Thomas Tauber-Marshall


On the rare occasion that {{Suballocator::Allocate()}} failed in 
{{HashTable::init()}}, the {{GroupingAggregator::Partition::Close()}} may 
access an uninitialized hash table, leading to crash below:

{noformat}
#4  0x00007f5413a1268f in JVM_handle_linux_signal () from 
./sysroot/usr/java/jdk1.8.0_144/jre/lib/amd64/server/libjvm.so
#5  0x00007f5413a08be3 in signalHandler(int, siginfo*, void*) () from 
./sysroot/usr/java/jdk1.8.0_144/jre/lib/amd64/server/libjvm.so
#6  <signal handler called>
#7  0x00000000023c5c00 in impala::HashTable::NextFilledBucket (this=0x1a1fa000, 
bucket_idx=0x7f533cfc73d0, node=0x7f533cfc73c8)
    at 
/data/jenkins/workspace/impala-private-parameterized/repos/Impala/be/src/exec/hash-table.inline.h:185
#8  0x000000000244b639 in impala::HashTable::Begin (this=0x1a1fa000, 
ctx=0x15445e00) at 
/data/jenkins/workspace/impala-private-parameterized/repos/Impala/be/src/exec/hash-table.inline.h:163
#9  0x0000000002457c69 in impala::GroupingAggregator::Partition::Close 
(this=0x133ef3e0, finalize_rows=true)
    at 
/data/jenkins/workspace/impala-private-parameterized/repos/Impala/be/src/exec/grouping-aggregator-partition.cc:207
#10 0x0000000002448f26 in impala::GroupingAggregator::ClosePartitions 
(this=0x1327aa00) at 
/data/jenkins/workspace/impala-private-parameterized/repos/Impala/be/src/exec/grouping-aggregator.cc:939
#11 0x0000000002443622 in impala::GroupingAggregator::Close (this=0x1327aa00, 
state=0x1bf5f180) at 
/data/jenkins/workspace/impala-private-parameterized/repos/Impala/be/src/exec/grouping-aggregator.cc:386
#12 0x0000000002412ce4 in impala::AggregationNode::Close (this=0x1346f600, 
state=0x1bf5f180) at 
/data/jenkins/workspace/impala-private-parameterized/repos/Impala/be/src/exec/aggregation-node.cc:139
#13 0x000000000242a69f in impala::BlockingJoinNode::ProcessBuildInputAsync 
(this=0xb466480, state=0x1bf5f180, build_sink=0xf35f600, status=0x7f53402ddb20)
    at 
/data/jenkins/workspace/impala-private-parameterized/repos/Impala/be/src/exec/blocking-join-node.cc:173
#14 0x000000000242a865 in 
impala::BlockingJoinNode::<lambda()>::operator()(void) const 
(__closure=0x21c6fd80) at 
/data/jenkins/workspace/impala-private-parameterized/repos/Impala/be/src/exec/blocking-join-node.cc:212
#15 0x000000000242c4d5 in 
boost::detail::function::void_function_obj_invoker0<impala::BlockingJoinNode::ProcessBuildInputAndOpenProbe(impala::RuntimeState*,
 impala::DataSink*)::<lambda()>, 
void>::invoke(boost::detail::function::function_buffer &) 
(function_obj_ptr=...) at 
/data/jenkins/workspace/impala-private-parameterized/Impala-Toolchain/boost-1.57.0-p3/include/boost/function/function_template.hpp:153
#16 0x0000000001d7eb4e in boost::function0<void>::operator() 
(this=0x7f533cfc7ba0) at 
/data/jenkins/workspace/impala-private-parameterized/Impala-Toolchain/boost-1.57.0-p3/include/boost/function/function_template.hpp:767
#17 0x000000000224f3d1 in impala::Thread::SuperviseThread(std::string const&, 
std::string const&, boost::function<void ()>, impala::ThreadDebugInfo const*, 
impala::Promise<long, (impala::PromiseMode)0>*) (name=...,
    category=..., functor=..., parent_thread_info=0x7f53402de850, 
thread_started=0x7f53402dd7d0) at 
/data/jenkins/workspace/impala-private-parameterized/repos/Impala/be/src/util/thread.cc:359
#18 0x0000000002257755 in boost::_bi::list5<boost::_bi::value<std::string>, 
boost::_bi::value<std::string>, boost::_bi::value<boost::function<void ()> >, 
boost::_bi::value<impala::ThreadDebugInfo*>, boost::_bi::value<impala::P---Type 
<return> to continue, or q <return> to quit---
romise<long, (impala::PromiseMode)0>*> >::operator()<void (*)(std::string 
const&, std::string const&, boost::function<void ()>, impala::ThreadDebugInfo 
const*, impala::Promise<long, (impala::PromiseMode)0>*), 
boost::_bi::list0>(boost::_bi::type<void>, void (*&)(std::string const&, 
std::string const&, boost::function<void ()>, impala::ThreadDebugInfo const*, 
impala::Promise<long, (impala::PromiseMode)0>*), boost::_bi::list0&, int) (
    this=0x14cf5fc0,
    f=@0x14cf5fb8: 0x224f06a <impala::Thread::SuperviseThread(std::string 
const&, std::string const&, boost::function<void ()>, impala::ThreadDebugInfo 
const*, impala::Promise<long, (impala::PromiseMode)0>*)>, a=...)
    at 
/data/jenkins/workspace/impala-private-parameterized/Impala-Toolchain/boost-1.57.0-p3/include/boost/bind/bind.hpp:525

(gdb) f 7
#7  0x00000000023c5c00 in impala::HashTable::NextFilledBucket (this=0x1a1fa000, 
bucket_idx=0x7f533cfc73d0, node=0x7f533cfc73c8)
    at 
/data/jenkins/workspace/impala-private-parameterized/repos/Impala/be/src/exec/hash-table.inline.h:185
(gdb) p this->buckets_
$14 = (impala::HashTable::Bucket *) 0x0

(gdb) f 13
#13 0x000000000242a69f in impala::BlockingJoinNode::ProcessBuildInputAsync 
(this=0xb466480, state=0x1bf5f180, build_sink=0xf35f600, status=0x7f53402ddb20)
    at 
/data/jenkins/workspace/impala-private-parameterized/repos/Impala/be/src/exec/blocking-join-node.cc:173
(gdb) p *status->msg_
$15 = {error_ = impala::TErrorCode::SCRATCH_ALLOCATION_FAILED, message_ = 
{static npos = <optimized out>, _M_dataplus = {<std::allocator<char>> = 
{<__gnu_cxx::new_allocator<char>> = {<No data fields>}, <No data fields>},
      _M_p = 0x14091a98 "Could not create files in any configured scratch 
directories (--scratch_dirs=/tmp/impala-scratch) on backend 
'impala-ec2-centos74-m5-4xlarge-ondemand-181d.vpc.cloudera.com:22002'. 512.00 
KB of scratch"...}}, details_ = {<std::_Vector_base<std::basic_string<char, 
std::char_traits<char>, std::allocator<char> >, 
std::allocator<std::basic_string<char, std::char_traits<char>, 
std::allocator<char> > > >> = {
      _M_impl = {<std::allocator<std::basic_string<char, 
std::char_traits<char>, std::allocator<char> > >> = 
{<__gnu_cxx::new_allocator<std::basic_string<char, std::char_traits<char>, 
std::allocator<char> > >> = {<No data fields>}, <No data fields>}, _M_start = 
0x14fecaa8, _M_finish = 0x14fecab0, _M_end_of_storage = 0x14fecab0}}, <No data 
fields>}}
{noformat}

In particular, we returned early in {{HashTable::Init()}} when 
{{allocator_->Allocate}} failed:
{noformat}
Status HashTable::Init(bool* got_memory) {
  int64_t buckets_byte_size = num_buckets_ * sizeof(Bucket);
  RETURN_IF_ERROR(allocator_->Allocate(buckets_byte_size, 
&bucket_allocation_)); <<----
  if (bucket_allocation_ == nullptr) {
    num_buckets_ = 0;
    *got_memory = false;
    return Status::OK();
  }
  buckets_ = reinterpret_cast<Bucket*>(bucket_allocation_->data());
  memset(buckets_, 0, buckets_byte_size);
  *got_memory = true;
  return Status::OK();
}
{noformat}

In {{GroupingAggregator::Partition::Close()}}, we tried to access the 
uninitialized hash table by calling {{HashTable::Begin()}}". We may need to 
consider handle the failure case better in 
{{GroupingAggregator::Partition::InitHashTable()}} or we need to have a boolean 
to indicate whether a hash table is initialized. May be worth double checking 
if other uses of hash tables suffer from the same problem.

{noformat}
void GroupingAggregator::Partition::Close(bool finalize_rows) {
  if (is_closed) return;
  is_closed = true;
  if (aggregated_row_stream.get() != nullptr) {
    if (finalize_rows && hash_tbl.get() != nullptr) {
      // We need to walk all the rows and Finalize them here so the UDA gets a 
chance
      // to cleanup. If the hash table is gone (meaning this was spilled), the 
rows
      // should have been finalized/serialized in Spill().
      parent->CleanupHashTbl(agg_fn_evals, 
hash_tbl->Begin(parent->ht_ctx_.get())); <<---
    }
{noformat}



--
This message was sent by Atlassian JIRA
(v7.6.3#76005)

---------------------------------------------------------------------
To unsubscribe, e-mail: issues-all-unsubscr...@impala.apache.org
For additional commands, e-mail: issues-all-h...@impala.apache.org

Reply via email to