[
https://issues.apache.org/jira/browse/MAPREDUCE-6346?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=14519353#comment-14519353
]
Tony Reix commented on MAPREDUCE-6346:
--------------------------------------
I've started to analyze the crash and to compare the run on PPC64LE with the
run on x86è64.
However, I do not understand what the test is aimed to do. That would help.
How to reproduce :
cd
hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-nativetask
mvn -l mvn.Test.OpenJDK.KVTest7.res test -Pnative,src -Drequire.snappy
-Dmaven.test.failure.ignore=true -Dsurefire.heap=-Xmx2048m -fn -X
-Dtest=org.apache.hadoop.mapred.nativetask.kvtest.KVTest
Maven/Java environment:
$ mvn -version
Apache Maven 3.2.5 (12a6b3acb947671f09b81f49094c53f426d8cea1;
2014-12-14T11:29:23-06:00)
Maven home: /opt/apache-maven-3.2.5
Java version: 1.7.0_79, vendor: Oracle Corporation
Java home: /usr/lib/jvm/java-1.7.0-openjdk-1.7.0.79-2.5.5.1.ael7b_1.ppc64le/jre
Default locale: en_US, platform encoding: ISO-8859-1
OS name: "linux", version: "3.10.0-229.ael7b.ppc64le", arch: "ppc64le", family:
"unix"
Stack is:
Core was generated by `java -Xmx4096m -XX:MaxPermSize=768m
-XX:+HeapDumpOnOutOfMemoryError -jar /home/'.
Program terminated with signal 6, Aborted.
#0 0x00003fffa55ce6b8 in raise () from /lib64/libc.so.6
Missing separate debuginfos, use: debuginfo-install
glib2-2.40.0-4.ael7b.ppc64le glibc-2.17-78.ael7b.ppc64le
libffi-3.0.13-16.ael7b.ppc64le libgcc-4.8.3-9.ael7b.ppc64le
libselinux-2.2.2-6.ael7b.ppc64le libstdc++-4.8.3-9.ael7b.ppc64le
pcre-8.32-14.ael7b.ppc64le snappy-1.1.0-3.ael7b.ppc64le
xz-libs-5.1.2-9alpha.ael7b.ppc64le zlib-1.2.7-14.ael7b.ppc64le
(gdb) bt
#0 0x00003fffa55ce6b8 in raise () from /lib64/libc.so.6
#1 0x00003fffa55d098c in abort () from /lib64/libc.so.6
#2 0x00003fffa524edcc in os::abort (dump_core=<optimized out>) at
/usr/src/debug/java-1.7.0-openjdk-1.7.0.79-2.5.5.1.ael7b_1.ppc64le/openjdk/hotspot/src/os/linux/vm/os_linux.cpp:1630
#3 0x00003fffa542977c in VMError::report_and_die (this=0x3fff84f9c108) at
/usr/src/debug/java-1.7.0-openjdk-1.7.0.79-2.5.5.1.ael7b_1.ppc64le/openjdk/hotspot/src/share/vm/utilities/vmError.cpp:1073
#4 0x00003fffa525c264 in JVM_handle_linux_signal (sig=<optimized out>,
info=0x3fff84f9d068, ucVoid=0x3fff84f9c2f0, abort_if_unrecognized=<optimized
out>)
at
/usr/src/debug/java-1.7.0-openjdk-1.7.0.79-2.5.5.1.ael7b_1.ppc64le/openjdk/hotspot/src/os_cpu/linux_ppc/vm/os_linux_ppc.cpp:437
#5 0x00003fffa524b7dc in signalHandler (sig=<optimized out>, info=<optimized
out>, uc=<optimized out>)
at
/usr/src/debug/java-1.7.0-openjdk-1.7.0.79-2.5.5.1.ael7b_1.ppc64le/openjdk/hotspot/src/os/linux/vm/os_linux.cpp:4361
#6 <signal handler called>
#7 NativeTask::WritableUtils::ReadVLongInner (pos=0x3fff89dd0000 <Address
0x3fff89dd0000 out of bounds>, len=@0x3fff84f9d310: 4294967184)
at
/home/reixt/HADOOP-2.7.0/hadoop-FromApache-Trunk-201504241115/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-nativetask/src/main/native/src/util/WritableUtils.cc:68
value = (value << 8) | *(uint8_t*)pos;
#8 0x00003fff853d7ebc in ReadVLong (len=<optimized out>, pos=<optimized out>)
at
/home/reixt/HADOOP-2.7.0/hadoop-FromApache-Trunk-201504241115/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-nativetask/src/main/native/src/util/WritableUtils.h:64
return ReadVLongInner(pos, len);
#9 ReadVInt (len=<optimized out>, pos=<optimized out>)
at
/home/reixt/HADOOP-2.7.0/hadoop-FromApache-Trunk-201504241115/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-nativetask/src/main/native/src/util/WritableUtils.h:69
inline static int32_t ReadVInt(const char * pos, uint32_t & len) {
return (int32_t)ReadVLong(pos, len);
#10 nextKey (keyLen=@0x3fff89b75518: 0, this=0x3fff89b8dbe0)
at
/home/reixt/HADOOP-2.7.0/hadoop-FromApache-Trunk-201504241115/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-nativetask/src/main/native/src/lib/IFile.h:74
const char * nextKey(uint32_t & keyLen) {
int64_t t1 = _reader.readVLong();
int64_t t2 = _reader.readVLong();
if (t1 == -1) {
return NULL;
}
const char * kvbuff = _reader.get((uint32_t)(t1 + t2));
uint32_t len;
switch (_kType) {
case TextType:
keyLen = WritableUtils::ReadVInt(kvbuff, len); <---------------
#11 NativeTask::IFileMergeEntry::next (this=0x3fff89b75500)
at
/home/reixt/HADOOP-2.7.0/hadoop-FromApache-Trunk-201504241115/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-nativetask/src/main/native/src/lib/Merge.h:211
virtual bool next() {
_key = _reader->nextKey(_keyLength); <---------------
#12 0x00003fff853d7774 in NativeTask::Merger::initHeap (this=0x3fff89b8db40)
at
/home/reixt/HADOOP-2.7.0/hadoop-FromApache-Trunk-201504241115/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-nativetask/src/main/native/src/lib/Merge.cc:83
....
Traces have been added in:
- src/main/native/src/lib/IFile.h
const char * nextKey(uint32_t & keyLen) {
int64_t t1 = _reader.readVLong();
int64_t t2 = _reader.readVLong();
if (t1 == -1) {
return NULL;
}
const char * kvbuff = _reader.get((uint32_t)(t1 + t2));
uint32_t len;
switch (_kType) {
case TextType:
std::cout << "nextKey 0 t1: " << t1 << std::endl ;
std::cout << "nextKey 0 t2: " << t2 << std::endl ;
std::cout << "nextKey 1 t12: " << t1+t2 << std::endl ;
std::cout << "nextKey 2 t12: " << (uint32_t)(t1+t2) << std::endl ;
keyLen = WritableUtils::ReadVInt(kvbuff, len);
break;
- src/main/native/src/util/WritableUtils.c
int64_t WritableUtils::ReadVLongInner(const char * pos, uint32_t & len) {
std::cout << "0 &pos: " << &pos << std::endl ;
std::cout << "1 pos: " << pos << std::endl ;
std::cout << "2 *pos: " << *pos << std::endl ;
std::cout << "2 (hex)*pos: " << std::hex << (int) *pos << std::dec << std::endl
;
std::cout << "2 (int)*pos: " << (int)*pos << std::endl ;
std::cout << "2 (int64_t)*pos: " << (int64_t)*pos << std::endl ;
bool neg = *pos < -120;
std::cout << "3 neg: " << neg << std::endl ;
len = neg ? (-119 - *pos) : (-111 - *pos);
std::cout << "4 len: " << len << std::endl ;
const char * end = pos + len;
std::cout << "5 end: " << end << std::endl ;
int64_t value = 0;
while (++pos < end) {
value = (value << 8) | *(uint8_t*)pos;
}
return neg ? (value ^ -1LL) : value;
}
With the above added traces, output is:
nextKey 0 t1: 2
nextKey 0 t2: 8
nextKey 1 t12: 10
nextKey 2 t12: 10
0 &pos: 0x3fff80e2d2b0
1 pos: ^A ��X~\~Egv^B^H^...........8d\x19\xb2P~\
2 *pos: ^A
2 (int)*pos: 1
2 (int64_t)*pos: 1
3 neg: 0
4 len: 4294967184
On x86_64, I have (with same traces):
......
15/04/29 14:28:49 INFO Mid-spill: { id: 4, collect: 183 ms, in-memory sort: 13
ms, in-memory records: 48288, merge&spill: 49 ms, uncompressed size: 5030953,
real size: 3742238 path:
/tmp/hadoop-reixt/mapred/local/localRunner/reixt/jobcache/job_local2085218591_0015/attempt_local2085218591_0015_m_000000_0/output/spill4.out
}
nextKey 0 t1: 2
nextKey 0 t2: 8
nextKey 1 t12: 10
nextKey 2 t12: 10
nextKey 0 t1: 2
nextKey 0 t2: 8
nextKey 1 t12: 10
nextKey 2 t12: 10
nextKey 0 t1: 2
nextKey 0 t2: 8
nextKey 1 t12: 10
nextKey 2 t12: 10
nextKey 0 t1: 2
nextKey 0 t2: 8
nextKey 1 t12: 10
nextKey 2 t12: 10
nextKey 0 t1: 2
nextKey 0 t2: 8
nextKey 1 t12: 10
nextKey 2 t12: 10
nextKey 0 t1: 2
nextKey 0 t2: 8
nextKey 1 t12: 10
nextKey 2 t12: 10
nextKey 0 t1: 2
nextKey 0 t2: 8
nextKey 1 t12: 10
nextKey 2 t12: 10
nextKey 0 t1: 2
nextKey 0 t2: 8
nextKey 1 t12: 10
nextKey 2 t12: 10
nextKey 0 t1: 2
nextKey 0 t2: 8
nextKey 1 t12: 10
nextKey 2 t12: 10
nextKey 0 t1: 2
nextKey 0 t2: 8
nextKey 1 t12: 10
nextKey 2 t12: 10
nextKey 0 t1: 173
nextKey 0 t2: 8
nextKey 1 t12: 181
nextKey 2 t12: 181
0 &pos: 0x7fd5186f0098
1 pos: ?? "`\xcc\xe0\xa4\x85........
2 *pos: ?
2 (hex)*pos: ffffff8f
2 (int)*pos: -113
2 (int64_t)*pos: -113
3 neg: 0
4 len: 2
5 end: "`\xcc\xe0\xa4\x85\xc4r\xb1........
nextKey 0 t1: 2
nextKey 0 t2: 8
nextKey 1 t12: 10
nextKey 2 t12: 10
nextKey 0 t1: 2
nextKey 0 t2: 8
nextKey 1 t12: 10
nextKey 2 t12: 10
nextKey 0 t1: 166
nextKey 0 t2: 8
nextKey 1 t12: 174
nextKey 2 t12: 174
0 &pos: 0x7fd5186f0098
1 pos: ?? \x1a\x16c\x17\xca\x83\xdeN\xd0w9;C0........
2 *pos: ?
2 (hex)*pos: ffffff8f
2 (int)*pos: -113
2 (int64_t)*pos: -113
3 neg: 0
4 len: 2
5 end: \x1a\x16c\x17\xca\x83\xdeN\xd0w9;C0........
So, on PPC64LE, pos is 1 instead of -113, generating a "negative" very big
length (2^32-1)-111 = 4294967184 instead of (-111 - *pos) = (-111 - (-113)) =
2 .
So, why *pos = 1 on PPC64LE instead of *pos = -113 on x86_64 ?
I need help for adding more tracing.
Interestingly, launching the Java command shown by "Forking ..." does not
generate the crash.
The crash only appears when using "mvn".
So adding traces seems to me the easier way for getting more date.
> mapred.nativetask.kvtest.KVTest crashes on PPC64LE
> --------------------------------------------------
>
> Key: MAPREDUCE-6346
> URL: https://issues.apache.org/jira/browse/MAPREDUCE-6346
> Project: Hadoop Map/Reduce
> Issue Type: Bug
> Affects Versions: 2.7.1
> Environment: RHEL 7.1 - PPC64 LE - OpenJDK
> rhel-2.5.5.1.ael7b_1-ppc64le u79-b14
> Reporter: Tony Reix
>
> Test org.apache.hadoop.mapred.nativetask.kvtest.KVTest (and 5 or 6 other
> tests) crashes on PPC64LE .
> ....
> 15/04/28 10:46:06 INFO Mid-spill: { id: 4, collect: 245 ms, in-memory sort:
> 32 ms, in-memory records: 48202, merge&spill: 80 ms, uncompressed size:
> 5031451, real size: 3739319 path:
> /tmp/hadoop-reixt/mapred/local/localRunner/reixt/jobcache/job_local408221154_0008/attempt_local408221154_0008_m_000000_0/output/spill4.out
> }
> # A fatal error has been detected by the Java Runtime Environment:
> #
> # SIGSEGV (0xb) at pc=0x00003fff6c7d8e50, pid=945, tid=70366264881616
> #
> # JRE version: OpenJDK Runtime Environment (7.0_79-b14) (build
> 1.7.0_79-mockbuild_2015_04_10_10_48-b00)
> # Java VM: OpenJDK 64-Bit Server VM (24.79-b02 mixed mode linux-ppc64
> compressed oops)
> # Derivative: IcedTea 2.5.5
> # Distribution: Built on Red Hat Enterprise Linux Server release 7.1 (Maipo)
> (Fri Apr 10 10:48:01 EDT 2015)
> # Problematic frame:
> # C [libnativetask.so.1.0.0+0x58e50]
> NativeTask::WritableUtils::ReadVLongInner(char const*, unsigned int&)+0x40
> #
> # Core dump written. Default location:
> /home/reixt/HADOOP-2.7.0/hadoop-FromApache-Trunk-201504241115/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-nativetask/core
> or core.945
> #
> # An error report file with more information is saved as:
> # /tmp/jvm-945/hs_error.log
> #
> # If you would like to submit a bug report, please include
> # instructions on how to reproduce the bug and visit:
> # http://icedtea.classpath.org/bugzilla
> # The crash happened outside the Java Virtual Machine in native code.
> # See problematic frame for where to report the bug.
> #
> /bin/sh: line 1: 945 Aborted (core dumped)
> /usr/lib/jvm/java-1.7.0-openjdk-1.7.0.79-2.5.5.1.ael7b_1.ppc64le/jre/bin/java
> -Xmx4096m -XX:MaxPermSize=768m -XX:+HeapDumpOnOutOfMemoryError -jar
> /home/reixt/HADOOP-2.7.0/hadoop-FromApache-Trunk-201504241115/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-nativetask/target/surefire/surefirebooter9078773752877532263.jar
>
> /home/reixt/HADOOP-2.7.0/hadoop-FromApache-Trunk-201504241115/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-nativetask/target/surefire/surefire4138802116387705281tmp
>
> /home/reixt/HADOOP-2.7.0/hadoop-FromApache-Trunk-201504241115/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-nativetask/target/surefire/surefire_01525011254551870798tmp
> /tmp/jvm-945/hs_error.log :
> # C [libnativetask.so.1.0.0+0x58e50]
> NativeTask::WritableUtils::ReadVLongInner(char const*, unsigned int&)+0x40
--
This message was sent by Atlassian JIRA
(v6.3.4#6332)