Hi Hoang, Ok let us only focus on core dump , so solution should be, adding section to checkpoint will still fail and let us change LOG to possible error case of SHM deficiency and return error to CPA .
Cpnd will NOT check the true size of shared memory for each write call before writing, this will degrade the performance of CPSV. Can you please re-send the patch V2 with fulfilling above , and change the fix description as well. -AVM On 11/25/2016 4:21 PM, Anders Widell wrote: > Yes, the problem with setting OSAF_CKPT_SHM_ALLOC_GUARANTEE is that > the memory consumption will increase. Therefore it is not backwards > compatible and thus not possible to do as a bug-fix. > > regards, > > Anders Widell > > > On 11/25/2016 10:46 AM, Vo Minh Hoang wrote: >> Dear Mahesh, >> >> I think this problem is #1712, problem occur when >> OSAF_CKPT_SHM_ALLOC_GUARANTEE is not set. >> >> My thinking is that because we provide 2 mode (guarantee or not) so we >> should making sure no coredump happened. >> Btw, because this is out of my scope of decide, I would like to ask >> Anders >> Widell about it. >> >> -----Original Message----- >> From: A V Mahesh [mailto:mahesh.va...@oracle.com] >> Sent: Friday, November 25, 2016 4:06 PM >> To: Hoang Vo <hoang.m...@dektech.com.au>; anders.wid...@ericsson.com >> Cc: opensaf-devel@lists.sourceforge.net >> Subject: Re: [PATCH 1 of 1] cpnd: ensure shared memory size before >> writing >> [#2202] >> >> Hi Hoang, >> >> Is this issue coming with enabling #1712 feature ? >> >> Exporting `OSAF_CKPT_SHM_ALLOC_GUARANTEE=1` will provide >> guaranteed CPSV >> service SHM issue and this is addressed broadly all SHM memory issue. >> >> So I don't think we required API level changes. ( may we need to >> document >> the usage of Exporting `OSAF_CKPT_SHM_ALLOC_GUARANTEE=1` ) >> >> #1712 description >> ============================================================================ >> >> == >> summary: leap: provide ensured disk space option for shm_open >> request [1712] >> >> description: >> leap: provide ensured disk space option for shm_open request [1712] >> Provided >> ensured disk space is allocated for NCS_OS_POSIX_SHM_REQ_OPEN request >> using >> posix_fallocate() so that application such as CPSV subsequent writes to >> bytes in the specified range are guaranteed not to fail because of >> lack of >> disk space. >> >> Updated the Opensaf services according to new options based on >> requirements. >> >> Cpsv service uses the ensured disk space option based on >> OSAF_CKPT_SHM_ALLOC_GUARANTEE environment variable ,so if user >> exports as >> OSAF_CKPT_SHM_ALLOC_GUARANTEE=1 (true) cpsv provided ensured disk space. >> ============================================================================ >> >> == >> >> -AVM >> >> >> On 11/23/2016 3:28 PM, Hoang Vo wrote: >>> osaf/libs/core/include/ncs_osprm.h | 9 +++++++++ >>> osaf/libs/core/leap/os_defs.c | 19 +++++++++++++++++-- >>> osaf/services/saf/cpsv/cpnd/cpnd_proc.c | 16 ++++++++++++++++ >>> 3 files changed, 42 insertions(+), 2 deletions(-) >>> >>> >>> problem: when checkpoint service init without shared memory size >>> guaranteed works in high memory load, core dump occur while adding >>> section >> to checkpoint. >>> solution: check the true size of shared memory before writing to it. >>> >>> diff --git a/osaf/libs/core/include/ncs_osprm.h >>> b/osaf/libs/core/include/ncs_osprm.h >>> --- a/osaf/libs/core/include/ncs_osprm.h >>> +++ b/osaf/libs/core/include/ncs_osprm.h >>> @@ -557,6 +557,7 @@ typedef enum { >>> NCS_OS_POSIX_SHM_REQ_UNLINK, /* unlink is shm_unlink */ >>> NCS_OS_POSIX_SHM_REQ_READ, >>> NCS_OS_POSIX_SHM_REQ_WRITE, >>> + NCS_OS_POSIX_SHM_REQ_STATS, >>> NCS_OS_POSIX_SHM_REQ_MAX >>> } NCS_OS_POSIX_SHM_REQ_TYPE; >>> typedef struct ncs_os_posix_shm_req_open_info_tag { @@ -598,6 >>> +599,13 @@ typedef struct ncs_os_posix_shm_req_writ >>> uint64_t i_offset; >>> } NCS_OS_POSIX_SHM_REQ_WRITE_INFO; >>> +typedef struct ncs_os_posix_shm_req_stats_info_tag { >>> + uint32_t i_hdl; >>> + int32_t i_fd; >>> + bool ensures_space; >>> + void *o_addr; >>> +} NCS_OS_POSIX_SHM_REQ_STATS_INFO; >>> + >>> typedef struct ncs_shm_req_info { >>> NCS_OS_POSIX_SHM_REQ_TYPE type; >>> @@ -607,6 +615,7 @@ typedef struct ncs_shm_req_info { >>> NCS_OS_POSIX_SHM_REQ_UNLINK_INFO unlink; >>> NCS_OS_POSIX_SHM_REQ_READ_INFO read; >>> NCS_OS_POSIX_SHM_REQ_WRITE_INFO write; >>> + NCS_OS_POSIX_SHM_REQ_STATS_INFO stats; >>> } info; >>> } NCS_OS_POSIX_SHM_REQ_INFO; >>> diff --git a/osaf/libs/core/leap/os_defs.c >>> b/osaf/libs/core/leap/os_defs.c >>> --- a/osaf/libs/core/leap/os_defs.c >>> +++ b/osaf/libs/core/leap/os_defs.c >>> @@ -799,9 +799,9 @@ uint32_t ncs_os_posix_shm(NCS_OS_POSIX_S >>> } >>> } else { >>> if (ftruncate(req->info.open.o_fd, (off_t) >> shm_size /* off_t == long */ ) < 0) { >>> - printf("ftruncate failed with errno >> value %d \n", errno); >>> + LOG_WA("ftruncate failed with errno >> value %d \n", errno); >>> return NCSCC_RC_FAILURE; >>> - } >>> + } >>> } >>> uint32_t prot_flag = >> ncs_shm_prot_flags(req->info.open.i_flags); >>> @@ -859,6 +859,21 @@ uint32_t ncs_os_posix_shm(NCS_OS_POSIX_S >>> req->info.write.i_write_size); >>> break; >>> + case NCS_OS_POSIX_SHM_REQ_STATS: >>> + if (!req->info.stats.o_addr) { >>> + printf("Output space is not defined\n"); >>> + return NCSCC_RC_FAILURE; >>> + } >>> + >>> + if (req->info.stats.ensures_space) { >>> + return NCSCC_RC_SUCCESS; >>> + } else { >>> + if(fstat(req->info.stats.i_fd, >> req->info.stats.o_addr)) { >>> + return NCSCC_RC_FAILURE; >>> + } >>> + } >>> + break; >>> + >>> default: >>> printf("Option Not supported %d \n", req->type); >>> return NCSCC_RC_FAILURE; >>> diff --git a/osaf/services/saf/cpsv/cpnd/cpnd_proc.c >>> b/osaf/services/saf/cpsv/cpnd/cpnd_proc.c >>> --- a/osaf/services/saf/cpsv/cpnd/cpnd_proc.c >>> +++ b/osaf/services/saf/cpsv/cpnd/cpnd_proc.c >>> @@ -1851,6 +1851,22 @@ uint32_t cpnd_sec_hdr_update(CPND_CKPT_S >>> CPSV_SECT_HDR sec_hdr; >>> uint32_t rc = NCSCC_RC_SUCCESS; >>> NCS_OS_POSIX_SHM_REQ_INFO write_req; >>> + struct stat shm_stat; >>> + memset(&write_req, '\0', sizeof(write_req)); >>> + memset(&shm_stat, '\0', sizeof(shm_stat)); >>> + write_req.type = NCS_OS_POSIX_SHM_REQ_STATS; >>> + write_req.info.stats.i_fd = >> cp_node->replica_info.open.info.open.o_fd; >>> + write_req.info.stats.ensures_space = false; >>> + write_req.info.stats.o_addr = &shm_stat; >>> + shm_stat.st_size = sizeof(CPSV_CKPT_HDR) + (sec_info->lcl_sec_id + >> 1) * (sizeof(CPSV_SECT_HDR) + cp_node->create_attrib.maxSectionSize); >>> + rc = ncs_os_posix_shm(&write_req); >>> + if (rc != NCSCC_RC_SUCCESS) { >>> + return rc; >>> + } >>> + >>> + if (shm_stat.st_size < sizeof(CPSV_CKPT_HDR) + >>> (sec_info->lcl_sec_id >> + 1) * (sizeof(CPSV_SECT_HDR) + >> cp_node->create_attrib.maxSectionSize)) { >>> + return NCSCC_RC_OUT_OF_MEM; >>> + } >>> memset(&write_req, '\0', sizeof(write_req)); >>> memset(&sec_hdr, '\0', sizeof(CPSV_SECT_HDR)); >>> sec_hdr.lcl_sec_id = sec_info->lcl_sec_id; >> > ------------------------------------------------------------------------------ _______________________________________________ Opensaf-devel mailing list Opensaf-devel@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/opensaf-devel