Hi Hoang,

Ok let us only focus on core dump ,  so solution should be,  adding 
section to checkpoint will still fail  and let us change LOG to
possible error case of SHM deficiency and return error to CPA .

Cpnd will NOT check the true size of shared memory for each write call 
before writing, this will degrade the performance of CPSV.

Can you please re-send the patch V2 with fulfilling above , and change 
the  fix description as well.

-AVM

On 11/25/2016 4:21 PM, Anders Widell wrote:
> Yes, the problem with setting OSAF_CKPT_SHM_ALLOC_GUARANTEE is that 
> the memory consumption will increase. Therefore it is not backwards 
> compatible and thus not possible to do as a bug-fix.
>
> regards,
>
> Anders Widell
>
>
> On 11/25/2016 10:46 AM, Vo Minh Hoang wrote:
>> Dear Mahesh,
>>
>> I think this problem is #1712, problem occur when
>> OSAF_CKPT_SHM_ALLOC_GUARANTEE is not set.
>>
>> My thinking is that because we provide 2 mode (guarantee or not) so we
>> should making sure no coredump happened.
>> Btw, because this is out of my scope of decide, I would like to ask 
>> Anders
>> Widell about it.
>>
>> -----Original Message-----
>> From: A V Mahesh [mailto:mahesh.va...@oracle.com]
>> Sent: Friday, November 25, 2016 4:06 PM
>> To: Hoang Vo <hoang.m...@dektech.com.au>; anders.wid...@ericsson.com
>> Cc: opensaf-devel@lists.sourceforge.net
>> Subject: Re: [PATCH 1 of 1] cpnd: ensure shared memory size before 
>> writing
>> [#2202]
>>
>> Hi Hoang,
>>
>> Is this issue coming with  enabling #1712  feature  ?
>>
>> Exporting  `OSAF_CKPT_SHM_ALLOC_GUARANTEE=1`  will  provide 
>> guaranteed CPSV
>> service SHM issue and this is addressed broadly all SHM memory issue.
>>
>> So I don't think we required  API level  changes. ( may we need to 
>> document
>> the usage of  Exporting  `OSAF_CKPT_SHM_ALLOC_GUARANTEE=1` )
>>
>> #1712  description
>> ============================================================================ 
>>
>> ==
>> summary:     leap: provide ensured disk space option for shm_open
>> request [1712]
>>
>> description:
>> leap: provide ensured disk space option for shm_open request [1712] 
>> Provided
>> ensured disk space is allocated for NCS_OS_POSIX_SHM_REQ_OPEN request 
>> using
>> posix_fallocate() so that application such as CPSV subsequent writes to
>> bytes in the specified range are guaranteed not to fail because of 
>> lack of
>> disk space.
>>
>> Updated the Opensaf services according to new options based on 
>> requirements.
>>
>> Cpsv service uses the ensured disk space option based on
>> OSAF_CKPT_SHM_ALLOC_GUARANTEE environment variable ,so if user 
>> exports as
>> OSAF_CKPT_SHM_ALLOC_GUARANTEE=1 (true) cpsv provided ensured disk space.
>> ============================================================================ 
>>
>> ==
>>
>> -AVM
>>
>>
>> On 11/23/2016 3:28 PM, Hoang Vo wrote:
>>> osaf/libs/core/include/ncs_osprm.h      |   9 +++++++++
>>>    osaf/libs/core/leap/os_defs.c           |  19 +++++++++++++++++--
>>>    osaf/services/saf/cpsv/cpnd/cpnd_proc.c |  16 ++++++++++++++++
>>>    3 files changed, 42 insertions(+), 2 deletions(-)
>>>
>>>
>>> problem: when checkpoint service init without shared memory size
>>> guaranteed works in high memory load, core dump occur while adding 
>>> section
>> to checkpoint.
>>> solution:  check the true size of shared memory before writing to it.
>>>
>>> diff --git a/osaf/libs/core/include/ncs_osprm.h
>>> b/osaf/libs/core/include/ncs_osprm.h
>>> --- a/osaf/libs/core/include/ncs_osprm.h
>>> +++ b/osaf/libs/core/include/ncs_osprm.h
>>> @@ -557,6 +557,7 @@ typedef enum {
>>>      NCS_OS_POSIX_SHM_REQ_UNLINK,    /* unlink is shm_unlink */
>>>      NCS_OS_POSIX_SHM_REQ_READ,
>>>      NCS_OS_POSIX_SHM_REQ_WRITE,
>>> +  NCS_OS_POSIX_SHM_REQ_STATS,
>>>      NCS_OS_POSIX_SHM_REQ_MAX
>>>    } NCS_OS_POSIX_SHM_REQ_TYPE;
>>>    typedef struct ncs_os_posix_shm_req_open_info_tag { @@ -598,6
>>> +599,13 @@ typedef struct ncs_os_posix_shm_req_writ
>>>      uint64_t i_offset;
>>>    } NCS_OS_POSIX_SHM_REQ_WRITE_INFO;
>>>    +typedef struct ncs_os_posix_shm_req_stats_info_tag {
>>> +  uint32_t i_hdl;
>>> +  int32_t i_fd;
>>> +  bool ensures_space;
>>> +  void *o_addr;
>>> +} NCS_OS_POSIX_SHM_REQ_STATS_INFO;
>>> +
>>>    typedef struct ncs_shm_req_info {
>>>      NCS_OS_POSIX_SHM_REQ_TYPE type;
>>>    @@ -607,6 +615,7 @@ typedef struct ncs_shm_req_info {
>>>        NCS_OS_POSIX_SHM_REQ_UNLINK_INFO unlink;
>>>        NCS_OS_POSIX_SHM_REQ_READ_INFO read;
>>>        NCS_OS_POSIX_SHM_REQ_WRITE_INFO write;
>>> +    NCS_OS_POSIX_SHM_REQ_STATS_INFO stats;
>>>      } info;
>>>       } NCS_OS_POSIX_SHM_REQ_INFO;
>>> diff --git a/osaf/libs/core/leap/os_defs.c
>>> b/osaf/libs/core/leap/os_defs.c
>>> --- a/osaf/libs/core/leap/os_defs.c
>>> +++ b/osaf/libs/core/leap/os_defs.c
>>> @@ -799,9 +799,9 @@ uint32_t ncs_os_posix_shm(NCS_OS_POSIX_S
>>>                    }
>>>                } else {
>>>                    if (ftruncate(req->info.open.o_fd, (off_t)
>> shm_size /* off_t == long */ ) < 0) {
>>> -                    printf("ftruncate failed with errno
>> value %d \n", errno);
>>> +                    LOG_WA("ftruncate failed with errno
>> value %d \n", errno);
>>>                        return NCSCC_RC_FAILURE;
>>> -                                }
>>> +                }
>>>                }
>>>                   uint32_t prot_flag =
>> ncs_shm_prot_flags(req->info.open.i_flags);
>>> @@ -859,6 +859,21 @@ uint32_t ncs_os_posix_shm(NCS_OS_POSIX_S
>>>                   req->info.write.i_write_size);
>>>            break;
>>>    +    case NCS_OS_POSIX_SHM_REQ_STATS:
>>> +        if (!req->info.stats.o_addr) {
>>> +            printf("Output space is not defined\n");
>>> +            return NCSCC_RC_FAILURE;
>>> +        }
>>> +
>>> +        if (req->info.stats.ensures_space) {
>>> +            return NCSCC_RC_SUCCESS;
>>> +        } else {
>>> +            if(fstat(req->info.stats.i_fd,
>> req->info.stats.o_addr)) {
>>> +                return NCSCC_RC_FAILURE;
>>> +            }
>>> +        }
>>> +        break;
>>> +
>>>        default:
>>>            printf("Option Not supported %d \n", req->type);
>>>            return NCSCC_RC_FAILURE;
>>> diff --git a/osaf/services/saf/cpsv/cpnd/cpnd_proc.c
>>> b/osaf/services/saf/cpsv/cpnd/cpnd_proc.c
>>> --- a/osaf/services/saf/cpsv/cpnd/cpnd_proc.c
>>> +++ b/osaf/services/saf/cpsv/cpnd/cpnd_proc.c
>>> @@ -1851,6 +1851,22 @@ uint32_t cpnd_sec_hdr_update(CPND_CKPT_S
>>>        CPSV_SECT_HDR sec_hdr;
>>>        uint32_t rc = NCSCC_RC_SUCCESS;
>>>        NCS_OS_POSIX_SHM_REQ_INFO write_req;
>>> +    struct stat shm_stat;
>>> +    memset(&write_req, '\0', sizeof(write_req));
>>> +    memset(&shm_stat, '\0', sizeof(shm_stat));
>>> +    write_req.type = NCS_OS_POSIX_SHM_REQ_STATS;
>>> +    write_req.info.stats.i_fd =
>> cp_node->replica_info.open.info.open.o_fd;
>>> +    write_req.info.stats.ensures_space = false;
>>> +    write_req.info.stats.o_addr = &shm_stat;
>>> +    shm_stat.st_size = sizeof(CPSV_CKPT_HDR) + (sec_info->lcl_sec_id +
>> 1) * (sizeof(CPSV_SECT_HDR) + cp_node->create_attrib.maxSectionSize);
>>> +    rc = ncs_os_posix_shm(&write_req);
>>> +    if (rc != NCSCC_RC_SUCCESS) {
>>> +        return rc;
>>> +    }
>>> +
>>> +    if (shm_stat.st_size < sizeof(CPSV_CKPT_HDR) + 
>>> (sec_info->lcl_sec_id
>> + 1) * (sizeof(CPSV_SECT_HDR) + 
>> cp_node->create_attrib.maxSectionSize)) {
>>> +        return NCSCC_RC_OUT_OF_MEM;
>>> +    }
>>>        memset(&write_req, '\0', sizeof(write_req));
>>>        memset(&sec_hdr, '\0', sizeof(CPSV_SECT_HDR));
>>>        sec_hdr.lcl_sec_id = sec_info->lcl_sec_id;
>>
>


------------------------------------------------------------------------------
_______________________________________________
Opensaf-devel mailing list
Opensaf-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/opensaf-devel

Reply via email to