On Sun, May 24, 2009 at 7:27 PM, Mateusz Berezecki <[email protected]> wrote:
> Hi list,
>
> I'll share some more details once I know more myself.
So the problem seems to be the rangeserver stuck in the do_maintenance
activity and waiting for the answer from the DfsBroker.
Thread 2 (Thread 0x16f4b90 (LWP 4404)):
#0 0x007ac402 in __kernel_vsyscall ()
#1 0x002fc145 in pthread_cond_wait@@GLIBC_2.3.2 () from
/lib/tls/i686/nosegneg/libpthread.so.0
#2 0x08243aee in
boost::condition_variable_any::wait<boost::unique_lock<boost::mutex> >
(this=0x16f21b4, m...@0x16f20c8)
at /usr/local/include/boost/thread/pthread/condition_variable.hpp:84
#3 0x083b77cc in
Hypertable::DispatchHandlerSynchronizer::wait_for_reply
(this=0x16f216c, event_p...@0x16f2164)
at
/home/mateusz/hypertable/src/cc/AsyncComm/DispatchHandlerSynchronizer.cc:60
#4 0x082e925b in Hypertable::DfsBroker::Client::remove
(this=0x9613a88, na...@0x16f22ec, force=true) at
/home/mateusz/hypertable/src/cc/DfsBroker/Lib/Client.cc:355
#5 0x082fd551 in Hypertable::CommitLog::purge (this=0x964a118,
revision=9223372036854775807) at
/home/mateusz/hypertable/src/cc/Hypertable/Lib/CommitLog.cc:238
#6 0x082918ef in Hypertable::MaintenanceScheduler::schedule
(this=0x9628800) at
/home/mateusz/hypertable/src/cc/Hypertable/RangeServer/MaintenanceScheduler.cc:113
#7 0x0824b86e in Hypertable::RangeServer::do_maintenance
(this=0x9617010) at
/home/mateusz/hypertable/src/cc/Hypertable/RangeServer/RangeServer.cc:1987
#8 0x08289d66 in Hypertable::TimerHandler::handle (this=0x9629080,
event_p...@0x16f366c) at
/home/mateusz/hypertable/src/cc/Hypertable/RangeServer/TimerHandler.cc:118
#9 0x083cc4e0 in Hypertable::Reactor::handle_timeouts
(this=0x9612020, next_timeo...@0x16f4300) at
/home/mateusz/hypertable/src/cc/AsyncComm/Reactor.cc:188
#10 0x083d2b7b in Hypertable::ReactorRunner::operator()
(this=0x961252c) at
/home/mateusz/hypertable/src/cc/AsyncComm/ReactorRunner.cc:103
#11 0x083d1ef0 in
boost::detail::thread_data<Hypertable::ReactorRunner>::run
(this=0x9612470) at
/usr/local/include/boost/thread/detail/thread.hpp:56
#12 0x08410f89 in thread_proxy (param=0x9612470) at
libs/thread/src/pthread/thread.cpp:130
#13 0x002f8545 in start_thread () from /lib/tls/i686/nosegneg/libpthread.so.0
#14 0x004e0a8e in clone () from /lib/tls/i686/nosegneg/libc.so.6
In the above stacktrace everything looks normal (?) . The code that's
locked up is below
346 void
347 Client::remove(const String &name, bool force) {
348 DispatchHandlerSynchronizer sync_handler;
349 EventPtr event_ptr;
350 CommBufPtr cbp(m_protocol.create_remove_request(name));
351
(gdb)
352 try {
353 send_message(cbp, &sync_handler);
354
355 if (!sync_handler.wait_for_reply(event_ptr)) { <--- the problem is
here
356 int error = Protocol::response_code(event_ptr.get());
357
358 if (!force || error != Error::DFSBROKER_FILE_NOT_FOUND)
359 HT_THROW(error,
m_protocol.string_format_message(event_ptr).c_str());
360 }
361 }
(gdb)
362 catch (Exception &e) {
363 HT_THROW2F(e.code(), e, "Error removing DFS file: %s",
name.c_str());
364 }
365 }
The name argument of Client::remove function is
(gdb) p name
$3 = (const String &) @0x16f22ec: {static npos = 4294967295,
_M_dataplus = {<std::allocator<char>> =
{<__gnu_cxx::new_allocator<char>> = {<No data fields>}, <No data
fields>},
_M_p = 0x962e3d4 "/hypertable/servers/10.249.3.79_38060/log/user/1"}}
(gdb)
The local broker log however tells that no such request arrived at all:
CPU cores count=1
DfsBroker.Host=localhost
DfsBroker.Local.Port=38030
DfsBroker.Local.Reactors=1
DfsBroker.Local.Root=fs/local
DfsBroker.Port=38030
HdfsBroker.Port=38030
HdfsBroker.Workers=20
HdfsBroker.fs.default.name=hdfs://localhost:9000
Hyperspace.GracePeriod=200000
Hyperspace.KeepAlive.Interval=30000
Hyperspace.Lease.Interval=1000000
Hyperspace.Master.Dir=hyperspace
Hyperspace.Master.Host=localhost
Hyperspace.Master.Port=38040
Hyperspace.Master.Workers=20
Hypertable.Master.Host=localhost
Hypertable.Master.Port=38050
Hypertable.Master.Workers=20
Hypertable.RangeServer.MemoryLimit.Percentage=50
Hypertable.RangeServer.Port=38060
Hypertable.Request.Timeout=180000
Hypertable.Verbose=true
ThriftBroker.PasswordHash=util
ThriftBroker.Port=38080
ThriftBroker.User=util
pidfile=/opt/hypertable/0.9.2.3/run/DfsBroker.local.pid
port=38030
reactors=1
root=fs/local
timeout=180000
verbose=true
1243184997 INFO localBroker :
(/home/mateusz/hypertable/src/cc/AsyncComm/IOHandler.h:86) Event:
type=CONNECTION_ESTABLISHED from=127.0.0.1:48075
1243185014 INFO localBroker :
(/home/mateusz/hypertable/src/cc/AsyncComm/IOHandler.h:86) Event:
type=CONNECTION_ESTABLISHED from=127.0.0.1:48082
1243185029 INFO localBroker :
(/home/mateusz/hypertable/src/cc/AsyncComm/IOHandler.h:86) Event:
type=CONNECTION_ESTABLISHED from=127.0.0.1:48086
1243185029 INFO localBroker :
(/home/mateusz/hypertable/src/cc/DfsBroker/local/LocalBroker.cc:98)
open( /hypertable/servers/10.249.3.79_38060/log/range_txn/0.log ) = 8
1243185029 INFO localBroker :
(/home/mateusz/hypertable/src/cc/DfsBroker/local/LocalBroker.cc:98)
open( /hypertable/servers/10.249.3.79_38060/log/root/0 ) = 9
1243185029 INFO localBroker :
(/home/mateusz/hypertable/src/cc/DfsBroker/local/LocalBroker.h:49)
close( /hypertable/servers/10.249.3.79_38060/log/root/0 , 9 )
1243185029 INFO localBroker :
(/home/mateusz/hypertable/src/cc/DfsBroker/local/LocalBroker.cc:146)
create( /hypertable/servers/10.249.3.79_38060/log/root/1 ) = 9
1243185029 INFO localBroker :
(/home/mateusz/hypertable/src/cc/DfsBroker/local/LocalBroker.cc:98)
open( /hypertable/servers/10.249.3.79_38060/log/metadata/0 ) = 10
1243185029 INFO localBroker :
(/home/mateusz/hypertable/src/cc/DfsBroker/local/LocalBroker.h:49)
close( /hypertable/servers/10.249.3.79_38060/log/metadata/0 , 10 )
1243185029 INFO localBroker :
(/home/mateusz/hypertable/src/cc/DfsBroker/local/LocalBroker.cc:98)
open( /hypertable/servers/10.249.3.79_38060/log/metadata/1 ) = 10
1243185029 INFO localBroker :
(/home/mateusz/hypertable/src/cc/DfsBroker/local/LocalBroker.h:49)
close( /hypertable/servers/10.249.3.79_38060/log/metadata/1 , 10 )
1243185029 INFO localBroker :
(/home/mateusz/hypertable/src/cc/DfsBroker/local/LocalBroker.cc:146)
create( /hypertable/servers/10.249.3.79_38060/log/metadata/2 ) = 10
1243185029 INFO localBroker :
(/home/mateusz/hypertable/src/cc/DfsBroker/local/LocalBroker.cc:98)
open( /hypertable/servers/10.249.3.79_38060/log/user/1 ) = 11
1243185029 INFO localBroker :
(/home/mateusz/hypertable/src/cc/DfsBroker/local/LocalBroker.h:49)
close( /hypertable/servers/10.249.3.79_38060/log/user/1 , 11 )
1243185029 INFO localBroker :
(/home/mateusz/hypertable/src/cc/DfsBroker/local/LocalBroker.cc:98)
open( /hypertable/servers/10.249.3.79_38060/log/user/2 ) = 11
1243185029 INFO localBroker :
(/home/mateusz/hypertable/src/cc/DfsBroker/local/LocalBroker.h:49)
close( /hypertable/servers/10.249.3.79_38060/log/user/2 , 11 )
1243185029 INFO localBroker :
(/home/mateusz/hypertable/src/cc/DfsBroker/local/LocalBroker.cc:146)
create( /hypertable/servers/10.249.3.79_38060/log/user/3 ) = 11
1243185029 INFO localBroker :
(/home/mateusz/hypertable/src/cc/DfsBroker/local/LocalBroker.cc:146)
create( /hypertable/servers/10.249.3.79_38060/log/range_txn/0.log.tmp
) = 12
1243185029 INFO localBroker :
(/home/mateusz/chypertable/src/cc/DfsBroker/local/LocalBroker.cc:98)
open( /hypertable/servers/10.249.3.79_38060/log/range_txn/0.log ) = 13
I'm going yet to check if this is a hardware or software problem.
Mateusz
--~--~---------~--~----~------------~-------~--~----~
You received this message because you are subscribed to the Google Groups
"Hypertable Development" group.
To post to this group, send email to [email protected]
To unsubscribe from this group, send email to
[email protected]
For more options, visit this group at
http://groups.google.com/group/hypertable-dev?hl=en
-~----------~----~----~----~------~----~------~--~---