Hello, I am having problems understanding an error valgrind gives me. I tried to bog down the program as much as possible. The original program as well as the test example both work fine, but when I link the created library to another application I get segfaults. I think that this piece of code is to blame. I run valgrind on it and get an invalid read.
The code can be seen at https://gist.github.com/floli/d62d16ce7cabb4522e2ae7e6b3cfda43 or below. It's about 60 lines of C/C++ code. I have also attached the valgrind report below the code. The code registers a custom MPI datatype and sends that using an isend. It does not crash or produces invalid data, but I fear that the invalid read message from valgrind is a hint of an existing memory corruption. But I got no idea where that could happen. OpenMPI 3.0.0 @ Arch I am very thankful of any hints whatsover! Florian // Compile and test with: mpicxx -std=c++11 -g -O0 mpitest.cpp && LD_PRELOAD=/usr/lib/valgrind/libmpiwrap-amd64-linux.so mpirun -n 1 valgrind --read-var-info=yes --leak-check=full ./a.out #include <vector> #include <iostream> #include <mpi.h> using namespace std; struct MPI_EventData { int size; }; void collect() { // Register MPI datatype MPI_Datatype MPI_EVENTDATA; int blocklengths[] = {1}; MPI_Aint displacements[] = {offsetof(MPI_EventData, size) }; MPI_Datatype types[] = {MPI_INT}; MPI_Type_create_struct(1, blocklengths, displacements, types, &MPI_EVENTDATA); MPI_Type_commit(&MPI_EVENTDATA); int rank, MPIsize; MPI_Comm_rank(MPI_COMM_WORLD, &rank); MPI_Comm_size(MPI_COMM_WORLD, &MPIsize); std::vector<MPI_Request> requests; std::vector<int> eventsPerRank(MPIsize); size_t eventsSize = 3; // each rank sends three events, invalid read happens only if eventsSize > 1 MPI_Gather(&eventsSize, 1, MPI_INT, eventsPerRank.data(), 1, MPI_INT, 0, MPI_COMM_WORLD); std::vector<MPI_EventData> eventSendBuf; // Buffer to hold the MPI_EventData object for (int i = 0; i < eventsSize; ++i) { MPI_EventData eventdata; MPI_Request req; eventdata.size = 5; eventSendBuf.push_back(eventdata); cout << "Isending event " << i << endl; MPI_Isend(&eventSendBuf.back(), 1, MPI_EVENTDATA, 0, 0, MPI_COMM_WORLD, &req); requests.push_back(req); } if (rank == 0) { for (int i = 0; i < MPIsize; ++i) { for (int j = 0; j < eventsPerRank[i]; ++j) { MPI_EventData ev; MPI_Recv(&ev, 1, MPI_EVENTDATA, i, MPI_ANY_TAG, MPI_COMM_WORLD, MPI_STATUS_IGNORE); cout << "Received Size = " << ev.size << endl; } } } MPI_Waitall(requests.size(), requests.data(), MPI_STATUSES_IGNORE); MPI_Type_free(&MPI_EVENTDATA); } int main(int argc, char *argv[]) { MPI_Init(&argc, &argv); collect(); MPI_Finalize(); } /* % mpicxx -std=c++11 -g -O0 mpitest.cpp && LD_PRELOAD=/usr/lib/valgrind/libmpiwrap-amd64-linux.so mpirun -n 1 valgrind --read-var-info=yes --leak-check=full ./a.out ==13584== Memcheck, a memory error detector ==13584== Copyright (C) 2002-2017, and GNU GPL'd, by Julian Seward et al. ==13584== Using Valgrind-3.13.0 and LibVEX; rerun with -h for copyright info ==13584== Command: ./a.out ==13584== valgrind MPI wrappers 13584: Active for pid 13584 valgrind MPI wrappers 13584: Try MPIWRAP_DEBUG=help for possible options ==13584== Thread 3: ==13584== Syscall param epoll_pwait(sigmask) points to unaddressable byte(s) ==13584== at 0x61A0FE6: epoll_pwait (in /usr/lib/libc-2.26.so) ==13584== by 0x677CDDC: ??? (in /usr/lib/openmpi/libopen-pal.so.40.0.0) ==13584== by 0x6780EDA: opal_libevent2022_event_base_loop (in /usr/lib/openmpi/libopen-pal.so.40.0.0) ==13584== by 0x93100CE: ??? (in /usr/lib/openmpi/openmpi/mca_pmix_pmix2x.so) ==13584== by 0x5E9408B: start_thread (in /usr/lib/libpthread-2.26.so) ==13584== by 0x61A0E7E: clone (in /usr/lib/libc-2.26.so) ==13584== Address 0x0 is not stack'd, malloc'd or (recently) free'd ==13584== Isending event 0 ==13584== Thread 1: ==13584== Invalid read of size 2 ==13584== at 0x4C33B20: memmove (vg_replace_strmem.c:1258) ==13584== by 0x11A7BB: MPI_EventData* std::__copy_move<true, true, std::random_access_iterator_tag>::__copy_m<MPI_EventData>(MPI_EventData const*, MPI_EventData const*, MPI_EventData*) (stl_algobase.h:368) ==13584== by 0x11A70B: MPI_EventData* std::__copy_move_a<true, MPI_EventData*, MPI_EventData*>(MPI_EventData*, MPI_EventData*, MPI_EventData*) (stl_algobase.h:386) ==13584== by 0x11A62B: MPI_EventData* std::__copy_move_a2<true, MPI_EventData*, MPI_EventData*>(MPI_EventData*, MPI_EventData*, MPI_EventData*) (stl_algobase.h:424) ==13584== by 0x11A567: MPI_EventData* std::copy<std::move_iterator<MPI_EventData*>, MPI_EventData*>(std::move_iterator<MPI_EventData*>, std::move_iterator<MPI_EventData*>, MPI_EventData*) (stl_algobase.h:456) ==13584== by 0x11A478: MPI_EventData* std::__uninitialized_copy<true>::__uninit_copy<std::move_iterator<MPI_EventData*>, MPI_EventData*>(std::move_iterator<MPI_EventData*>, std::move_iterator<MPI_EventData*>, MPI_EventData*) (stl_uninitialized.h:101) ==13584== by 0x11A306: MPI_EventData* std::uninitialized_copy<std::move_iterator<MPI_EventData*>, MPI_EventData*>(std::move_iterator<MPI_EventData*>, std::move_iterator<MPI_EventData*>, MPI_EventData*) (stl_uninitialized.h:134) ==13584== by 0x11A05B: MPI_EventData* std::__uninitialized_copy_a<std::move_iterator<MPI_EventData*>, MPI_EventData*, MPI_EventData>(std::move_iterator<MPI_EventData*>, std::move_iterator<MPI_EventData*>, MPI_EventData*, std::allocator<MPI_EventData>&) (stl_uninitialized.h:289) ==13584== by 0x119AEC: MPI_EventData* std::__uninitialized_move_if_noexcept_a<MPI_EventData*, MPI_EventData*, std::allocator<MPI_EventData> >(MPI_EventData*, MPI_EventData*, MPI_EventData*, std::allocator<MPI_EventData>&) (stl_uninitialized.h:312) ==13584== by 0x1190D2: void std::vector<MPI_EventData, std::allocator<MPI_EventData> >::_M_realloc_insert<MPI_EventData >const&>(__gnu_cxx::__normal_iterator<MPI_EventData*, std::vector<MPI_EventData, std::allocator<MPI_EventData> > >, MPI_EventData const&) (vector.tcc:424) ==13584== by 0x118B17: std::vector<MPI_EventData, std::allocator<MPI_EventData> >::push_back(MPI_EventData const&) (stl_vector.h:948) ==13584== by 0x113B6E: collect() (mpitest.cpp:42) ==13584== Address 0xd315ca0 is 0 bytes inside a block of size 4 alloc'd ==13584== at 0x4C2D54F: operator new(unsigned long) (vg_replace_malloc.c:334) ==13584== by 0x11A2BB: __gnu_cxx::new_allocator<MPI_EventData>::allocate(unsigned long, void const*) (new_allocator.h:111) ==13584== by 0x119FE2: std::allocator_traits<std::allocator<MPI_EventData> >::allocate(std::allocator<MPI_EventData>&, unsigned long) (alloc_traits.h:436) ==13584== by 0x119A87: std::_Vector_base<MPI_EventData, std::allocator<MPI_EventData> >::_M_allocate(unsigned long) (stl_vector.h:172) ==13584== by 0x119055: void std::vector<MPI_EventData, std::allocator<MPI_EventData> >::_M_realloc_insert<MPI_EventData >const&>(__gnu_cxx::__normal_iterator<MPI_EventData*, std::vector<MPI_EventData, std::allocator<MPI_EventData> > >, MPI_EventData const&) (vector.tcc:406) ==13584== by 0x118B17: std::vector<MPI_EventData, std::allocator<MPI_EventData> >::push_back(MPI_EventData const&) (stl_vector.h:948) ==13584== by 0x113B6E: collect() (mpitest.cpp:42) ==13584== by 0x113DEE: main (mpitest.cpp:68) ==13584== Isending event 1 Isending event 2 Received Size = 5 Received Size = 5 Received Size = 5 ==13584== ==13584== HEAP SUMMARY: ==13584== in use at exit: 1,898 bytes in 44 blocks ==13584== total heap usage: 18,037 allocs, 17,993 frees, 4,086,090 bytes allocated ==13584== ==13584== 5 bytes in 1 blocks are definitely lost in loss record 1 of 44 ==13584== at 0x4C2CEDF: malloc (vg_replace_malloc.c:299) ==13584== by 0x6130B9A: strdup (in /usr/lib/libc-2.26.so) ==13584== by 0x954B720: ??? ==13584== by 0x9312913: ??? ==13584== by 0x9313114: ??? ==13584== by 0x930FE31: ??? ==13584== by 0x92CA3AD: ??? ==13584== by 0x92A3ED7: ??? ==13584== by 0x867C876: ??? ==13584== by 0x647BBEA: orte_init (in /usr/lib/openmpi/libopen-rte.so.40.0.0) ==13584== by 0x52E3F77: ompi_mpi_init (in /usr/lib/openmpi/libmpi.so.40.0.0) ==13584== by 0x530B78D: PMPI_Init (in /usr/lib/openmpi/libmpi.so.40.0.0) ==13584== ==13584== 12 bytes in 1 blocks are definitely lost in loss record 2 of 44 ==13584== at 0x4C2CEDF: malloc (vg_replace_malloc.c:299) ==13584== by 0x6130B9A: strdup (in /usr/lib/libc-2.26.so) ==13584== by 0x954FA92: ??? ==13584== by 0x9323720: ??? ==13584== by 0x92CA651: ??? ==13584== by 0x92A3ED7: ??? ==13584== by 0x867C876: ??? ==13584== by 0x647BBEA: orte_init (in /usr/lib/openmpi/libopen-rte.so.40.0.0) ==13584== by 0x52E3F77: ompi_mpi_init (in /usr/lib/openmpi/libmpi.so.40.0.0) ==13584== by 0x530B78D: PMPI_Init (in /usr/lib/openmpi/libmpi.so.40.0.0) ==13584== by 0x4E4A2C0: PMPI_Init (libmpiwrap.c:2271) ==13584== by 0x113DE9: main (mpitest.cpp:66) ==13584== ==13584== 35 bytes in 1 blocks are definitely lost in loss record 24 of 44 ==13584== at 0x4C2CEDF: malloc (vg_replace_malloc.c:299) ==13584== by 0x6130B9A: strdup (in /usr/lib/libc-2.26.so) ==13584== by 0x954B6B8: ??? ==13584== by 0x9312913: ??? ==13584== by 0x9313114: ??? ==13584== by 0x930FE31: ??? ==13584== by 0x92CA3AD: ??? ==13584== by 0x92A3ED7: ??? ==13584== by 0x867C876: ??? ==13584== by 0x647BBEA: orte_init (in /usr/lib/openmpi/libopen-rte.so.40.0.0) ==13584== by 0x52E3F77: ompi_mpi_init (in /usr/lib/openmpi/libmpi.so.40.0.0) ==13584== by 0x530B78D: PMPI_Init (in /usr/lib/openmpi/libmpi.so.40.0.0) ==13584== ==13584== 1,608 (320 direct, 1,288 indirect) bytes in 1 blocks are definitely lost in loss record 44 of 44 ==13584== at 0x4C2F0FF: realloc (vg_replace_malloc.c:785) ==13584== by 0x92B1D7E: ??? ==13584== by 0x92B9FFE: ??? ==13584== by 0x92BB03E: ??? ==13584== by 0x6781AD8: opal_libevent2022_event_base_loop (in /usr/lib/openmpi/libopen-pal.so.40.0.0) ==13584== by 0x93100CE: ??? ==13584== by 0x5E9408B: start_thread (in /usr/lib/libpthread-2.26.so) ==13584== by 0x61A0E7E: clone (in /usr/lib/libc-2.26.so) ==13584== ==13584== LEAK SUMMARY: ==13584== definitely lost: 372 bytes in 4 blocks ==13584== indirectly lost: 1,288 bytes in 34 blocks ==13584== possibly lost: 0 bytes in 0 blocks ==13584== still reachable: 238 bytes in 6 blocks ==13584== suppressed: 0 bytes in 0 blocks ==13584== Reachable blocks (those to which a pointer was found) are not shown. ==13584== To see them, rerun with: --leak-check=full --show-leak-kinds=all ==13584== ==13584== For counts of detected and suppressed errors, rerun with: -v ==13584== ERROR SUMMARY: 44 errors from 6 contexts (suppressed: 0 from 0) */ _______________________________________________ users mailing list users@lists.open-mpi.org https://lists.open-mpi.org/mailman/listinfo/users