Re: [devel] [PATCH 1 of 1] cpnd: use shared memory based on ckpt name length [#2108] V2
Hi Hoang, ACK form me. Function change looks less and code resignations looks much, to make review easy next time on-words please split the function change & code resignations in to different patches ( 1 of 2 & 2 of 2). Not Tested , in-service upgrade & log DN cases -AVM On 10/27/2016 12:48 PM, Vo Minh Hoang wrote: > Dear Mahesh, > > I tested with cases: > - Old active with new standby > - Old standby with new active > > Each case, create checkpoint, create section, write and read section, close > and unlink. > > Sincerely, > Hoang > > -Original Message- > From: A V Mahesh [mailto:mahesh.va...@oracle.com] > Sent: Thursday, October 27, 2016 1:58 PM > To: Hoang Vo; anders.wid...@ericsson.com > Cc: opensaf-devel@lists.sourceforge.net > Subject: Re: [PATCH 1 of 1] cpnd: use shared memory based on ckpt name > length [#2108] V2 > > Hi Hoang, > > Have you tested in-service upgrade case ? > > -AVM > > > On 10/26/2016 2:33 PM, Hoang Vo wrote: >>osaf/libs/common/cpsv/include/cpsv_shm.h | 28 +- >>osaf/services/saf/cpsv/cpnd/cpnd_res.c | 868 > -- >>2 files changed, 355 insertions(+), 541 deletions(-) >> >> >> problem: In the case of CKPT osafckptnd increased 3,5Mb - 240 percent >> on all nodes CKPT_INFO size inscrease when support longDN lead to total > size increase. >> solution: >> - From start, cpnd use old format shm. >> - Run time cpnd keep using old format shm until first longDN checkpoint is > created. >> After that cpnd create extended format shm for longDN use. >> - Fix init size for shm. >> >> diff --git a/osaf/libs/common/cpsv/include/cpsv_shm.h >> b/osaf/libs/common/cpsv/include/cpsv_shm.h >> --- a/osaf/libs/common/cpsv/include/cpsv_shm.h >> +++ b/osaf/libs/common/cpsv/include/cpsv_shm.h >> @@ -27,7 +27,9 @@ >>#define SHM_NEXT -3 >>#define SHM_INIT -1 >> >> -#define CPSV_CPND_SHM_VERSION1 >> +#define CPSV_CPND_SHM_VERSION 1 >> +#define CPSV_CPND_SHM_VERSION_DEPRECATE 2 >> +#define CPSV_CPND_SHM_VERSION_EXTENDED 3 >> >>typedef struct cpsv_ckpt_hdr { >> SaCkptCheckpointHandleT ckpt_id;/* Index for identifying the > checkpoint */ >> @@ -57,7 +59,7 @@ typedef struct cpsv_sect_hdr { >>} CPSV_SECT_HDR; >> >>typedef struct ckpt_info { >> -char ckpt_name[kOsafMaxDnLength]; >> +SaNameT ckpt_name; >> SaCkptCheckpointHandleT ckpt_id; >> uint32_t maxSections; >> SaSizeT maxSecSize; >> @@ -74,23 +76,10 @@ typedef struct ckpt_info { >> int32_t next; >>} CKPT_INFO; >> >> -typedef struct ckpt_info_v0 { >> -SaNameT ckpt_name; >> -SaCkptCheckpointHandleT ckpt_id; >> -uint32_t maxSections; >> -SaSizeT maxSecSize; >> -NODE_ID node_id; >> -int32_t offset; >> -uint32_t client_bitmap; >> -int32_t is_valid; >> -uint32_t bm_offset; >> -bool is_unlink; >> -bool is_close; >> -bool cpnd_rep_create; >> -bool is_first; >> -SaTimeT close_time; >> -int32_t next; >> -} CKPT_INFO_V0; >> +typedef struct ckpt_extend_info { >> +char ckpt_name[kOsafMaxDnLength + 1]; >> +uint32_t is_valid; >> +} CKPT_EXTENDED_INFO; >> >>typedef struct client_info { >> SaCkptHandleT ckpt_app_hdl; >> @@ -109,6 +98,7 @@ typedef struct gbl_shm_ptr { >> void *base_addr; >> void *cli_addr; >> void *ckpt_addr; >> +void *extended_addr;/* Added in CPSV_CPND_SHM_VERSION_EXTENDED > */ >> int32_t n_clients; >> int32_t n_ckpts; >>} GBL_SHM_PTR; >> diff --git a/osaf/services/saf/cpsv/cpnd/cpnd_res.c >> b/osaf/services/saf/cpsv/cpnd/cpnd_res.c >> --- a/osaf/services/saf/cpsv/cpnd/cpnd_res.c >> +++ b/osaf/services/saf/cpsv/cpnd/cpnd_res.c >> @@ -40,8 +40,6 @@ >> >>#define m_CPND_CKPTINFO_READ(ckpt_info,addr,offset) >> memcpy(_info,addr+offset,sizeof(CKPT_INFO)) >> >> -#define m_CPND_CKPTINFO_V0_READ(ckpt_info,addr,offset) >> memcpy(_info,addr+offset,sizeof(CKPT_INFO_V0)) >> - >>#define m_CPND_CKPTINFO_UPDATE(addr,ckpt_info,offset) >> memcpy(addr+offset,_info,sizeof(CKPT_INFO)) >> >>#define m_CPND_CKPTHDR_UPDATE(ckpt_hdr,offset) >> memcpy(offset,_hdr,sizeof(CKPT_HDR)) >> @@ -50,13 +48,11 @@ static uint32_t cpnd_res_ckpt_sec_add(CP >>static bool cpnd_find_exact_ckptinfo(CPND_CB *cb, CKPT_INFO *ckpt_info, > uint32_t bitmap_offset, >> uint32_t *offset, uint32_t > *prev_offset); >>static void cpnd_clear_ckpt_info(CPND_CB *cb, CPND_CKPT_NODE >> *cp_node, uint32_t curr_offset, uint32_t prev_offset); -static >> uint32_t cpnd_restore_client_info(CPND_CB *cb, uint8_t *cli_addr); >> -static uint32_t cpnd_restore_ckpt_info_v1(CPND_CB *cb, uint8_t >> *ckpt_addr, SaClmNodeIdT nodeid); -static uint32_t >> cpnd_restore_ckpt_info_v0(CPND_CB *cb, uint8_t *ckpt_addr, >> SaClmNodeIdT nodeid); -static void >> cpnd_destroy_shm_cpnd_cp_info(NCS_OS_POSIX_SHM_REQ_OPEN_INFO >> *open_req); -static void >>
Re: [devel] [PATCH 1 of 1] cpnd: use shared memory based on ckpt name length [#2108] V2
Dear Mahesh, I tested with cases: - Old active with new standby - Old standby with new active Each case, create checkpoint, create section, write and read section, close and unlink. Sincerely, Hoang -Original Message- From: A V Mahesh [mailto:mahesh.va...@oracle.com] Sent: Thursday, October 27, 2016 1:58 PM To: Hoang Vo; anders.wid...@ericsson.com Cc: opensaf-devel@lists.sourceforge.net Subject: Re: [PATCH 1 of 1] cpnd: use shared memory based on ckpt name length [#2108] V2 Hi Hoang, Have you tested in-service upgrade case ? -AVM On 10/26/2016 2:33 PM, Hoang Vo wrote: > osaf/libs/common/cpsv/include/cpsv_shm.h | 28 +- > osaf/services/saf/cpsv/cpnd/cpnd_res.c | 868 -- > 2 files changed, 355 insertions(+), 541 deletions(-) > > > problem: In the case of CKPT osafckptnd increased 3,5Mb - 240 percent > on all nodes CKPT_INFO size inscrease when support longDN lead to total size increase. > > solution: > - From start, cpnd use old format shm. > - Run time cpnd keep using old format shm until first longDN checkpoint is created. > After that cpnd create extended format shm for longDN use. > - Fix init size for shm. > > diff --git a/osaf/libs/common/cpsv/include/cpsv_shm.h > b/osaf/libs/common/cpsv/include/cpsv_shm.h > --- a/osaf/libs/common/cpsv/include/cpsv_shm.h > +++ b/osaf/libs/common/cpsv/include/cpsv_shm.h > @@ -27,7 +27,9 @@ > #define SHM_NEXT -3 > #define SHM_INIT -1 > > -#define CPSV_CPND_SHM_VERSION1 > +#define CPSV_CPND_SHM_VERSION1 > +#define CPSV_CPND_SHM_VERSION_DEPRECATE 2 > +#define CPSV_CPND_SHM_VERSION_EXTENDED 3 > > typedef struct cpsv_ckpt_hdr { > SaCkptCheckpointHandleT ckpt_id;/* Index for identifying the checkpoint */ > @@ -57,7 +59,7 @@ typedef struct cpsv_sect_hdr { > } CPSV_SECT_HDR; > > typedef struct ckpt_info { > - char ckpt_name[kOsafMaxDnLength]; > + SaNameT ckpt_name; > SaCkptCheckpointHandleT ckpt_id; > uint32_t maxSections; > SaSizeT maxSecSize; > @@ -74,23 +76,10 @@ typedef struct ckpt_info { > int32_t next; > } CKPT_INFO; > > -typedef struct ckpt_info_v0 { > - SaNameT ckpt_name; > - SaCkptCheckpointHandleT ckpt_id; > - uint32_t maxSections; > - SaSizeT maxSecSize; > - NODE_ID node_id; > - int32_t offset; > - uint32_t client_bitmap; > - int32_t is_valid; > - uint32_t bm_offset; > - bool is_unlink; > - bool is_close; > - bool cpnd_rep_create; > - bool is_first; > - SaTimeT close_time; > - int32_t next; > -} CKPT_INFO_V0; > +typedef struct ckpt_extend_info { > + char ckpt_name[kOsafMaxDnLength + 1]; > + uint32_t is_valid; > +} CKPT_EXTENDED_INFO; > > typedef struct client_info { > SaCkptHandleT ckpt_app_hdl; > @@ -109,6 +98,7 @@ typedef struct gbl_shm_ptr { > void *base_addr; > void *cli_addr; > void *ckpt_addr; > + void *extended_addr;/* Added in CPSV_CPND_SHM_VERSION_EXTENDED */ > int32_t n_clients; > int32_t n_ckpts; > } GBL_SHM_PTR; > diff --git a/osaf/services/saf/cpsv/cpnd/cpnd_res.c > b/osaf/services/saf/cpsv/cpnd/cpnd_res.c > --- a/osaf/services/saf/cpsv/cpnd/cpnd_res.c > +++ b/osaf/services/saf/cpsv/cpnd/cpnd_res.c > @@ -40,8 +40,6 @@ > > #define m_CPND_CKPTINFO_READ(ckpt_info,addr,offset) > memcpy(_info,addr+offset,sizeof(CKPT_INFO)) > > -#define m_CPND_CKPTINFO_V0_READ(ckpt_info,addr,offset) > memcpy(_info,addr+offset,sizeof(CKPT_INFO_V0)) > - > #define m_CPND_CKPTINFO_UPDATE(addr,ckpt_info,offset) > memcpy(addr+offset,_info,sizeof(CKPT_INFO)) > > #define m_CPND_CKPTHDR_UPDATE(ckpt_hdr,offset) > memcpy(offset,_hdr,sizeof(CKPT_HDR)) > @@ -50,13 +48,11 @@ static uint32_t cpnd_res_ckpt_sec_add(CP > static bool cpnd_find_exact_ckptinfo(CPND_CB *cb, CKPT_INFO *ckpt_info, uint32_t bitmap_offset, >uint32_t *offset, uint32_t *prev_offset); > static void cpnd_clear_ckpt_info(CPND_CB *cb, CPND_CKPT_NODE > *cp_node, uint32_t curr_offset, uint32_t prev_offset); -static > uint32_t cpnd_restore_client_info(CPND_CB *cb, uint8_t *cli_addr); > -static uint32_t cpnd_restore_ckpt_info_v1(CPND_CB *cb, uint8_t > *ckpt_addr, SaClmNodeIdT nodeid); -static uint32_t > cpnd_restore_ckpt_info_v0(CPND_CB *cb, uint8_t *ckpt_addr, > SaClmNodeIdT nodeid); -static void > cpnd_destroy_shm_cpnd_cp_info(NCS_OS_POSIX_SHM_REQ_OPEN_INFO > *open_req); -static void > *cpnd_create_shm_cpnd_cp_info(NCS_OS_POSIX_SHM_REQ_INFO *req_info); > -static void cpnd_update_shm_cpnd_cp_info(CPND_CB *cb); -static void > cpnd_convert_cp_info_v0(CKPT_INFO_V0 *cp_info_v0, CKPT_INFO *cp_info); > +static void cpnd_destroy_shm(NCS_OS_POSIX_SHM_REQ_OPEN_INFO > +*open_req); static uint32_t cpnd_shm_extended_open(CPND_CB *cb, > +uint32_t flag); static uint32_t > +cpnd_extended_name_lend(SaConstStringT value, SaNameT* name); static >
Re: [devel] [PATCH 1 of 1] cpnd: use shared memory based on ckpt name length [#2108] V2
Hi Hoang, Have you tested in-service upgrade case ? -AVM On 10/26/2016 2:33 PM, Hoang Vo wrote: > osaf/libs/common/cpsv/include/cpsv_shm.h | 28 +- > osaf/services/saf/cpsv/cpnd/cpnd_res.c | 868 > -- > 2 files changed, 355 insertions(+), 541 deletions(-) > > > problem: In the case of CKPT osafckptnd increased 3,5Mb - 240 percent on all > nodes > CKPT_INFO size inscrease when support longDN lead to total size increase. > > solution: > - From start, cpnd use old format shm. > - Run time cpnd keep using old format shm until first longDN checkpoint is > created. > After that cpnd create extended format shm for longDN use. > - Fix init size for shm. > > diff --git a/osaf/libs/common/cpsv/include/cpsv_shm.h > b/osaf/libs/common/cpsv/include/cpsv_shm.h > --- a/osaf/libs/common/cpsv/include/cpsv_shm.h > +++ b/osaf/libs/common/cpsv/include/cpsv_shm.h > @@ -27,7 +27,9 @@ > #define SHM_NEXT -3 > #define SHM_INIT -1 > > -#define CPSV_CPND_SHM_VERSION1 > +#define CPSV_CPND_SHM_VERSION1 > +#define CPSV_CPND_SHM_VERSION_DEPRECATE 2 > +#define CPSV_CPND_SHM_VERSION_EXTENDED 3 > > typedef struct cpsv_ckpt_hdr { > SaCkptCheckpointHandleT ckpt_id;/* Index for identifying the > checkpoint */ > @@ -57,7 +59,7 @@ typedef struct cpsv_sect_hdr { > } CPSV_SECT_HDR; > > typedef struct ckpt_info { > - char ckpt_name[kOsafMaxDnLength]; > + SaNameT ckpt_name; > SaCkptCheckpointHandleT ckpt_id; > uint32_t maxSections; > SaSizeT maxSecSize; > @@ -74,23 +76,10 @@ typedef struct ckpt_info { > int32_t next; > } CKPT_INFO; > > -typedef struct ckpt_info_v0 { > - SaNameT ckpt_name; > - SaCkptCheckpointHandleT ckpt_id; > - uint32_t maxSections; > - SaSizeT maxSecSize; > - NODE_ID node_id; > - int32_t offset; > - uint32_t client_bitmap; > - int32_t is_valid; > - uint32_t bm_offset; > - bool is_unlink; > - bool is_close; > - bool cpnd_rep_create; > - bool is_first; > - SaTimeT close_time; > - int32_t next; > -} CKPT_INFO_V0; > +typedef struct ckpt_extend_info { > + char ckpt_name[kOsafMaxDnLength + 1]; > + uint32_t is_valid; > +} CKPT_EXTENDED_INFO; > > typedef struct client_info { > SaCkptHandleT ckpt_app_hdl; > @@ -109,6 +98,7 @@ typedef struct gbl_shm_ptr { > void *base_addr; > void *cli_addr; > void *ckpt_addr; > + void *extended_addr;/* Added in CPSV_CPND_SHM_VERSION_EXTENDED */ > int32_t n_clients; > int32_t n_ckpts; > } GBL_SHM_PTR; > diff --git a/osaf/services/saf/cpsv/cpnd/cpnd_res.c > b/osaf/services/saf/cpsv/cpnd/cpnd_res.c > --- a/osaf/services/saf/cpsv/cpnd/cpnd_res.c > +++ b/osaf/services/saf/cpsv/cpnd/cpnd_res.c > @@ -40,8 +40,6 @@ > > #define m_CPND_CKPTINFO_READ(ckpt_info,addr,offset) > memcpy(_info,addr+offset,sizeof(CKPT_INFO)) > > -#define m_CPND_CKPTINFO_V0_READ(ckpt_info,addr,offset) > memcpy(_info,addr+offset,sizeof(CKPT_INFO_V0)) > - > #define m_CPND_CKPTINFO_UPDATE(addr,ckpt_info,offset) > memcpy(addr+offset,_info,sizeof(CKPT_INFO)) > > #define m_CPND_CKPTHDR_UPDATE(ckpt_hdr,offset) > memcpy(offset,_hdr,sizeof(CKPT_HDR)) > @@ -50,13 +48,11 @@ static uint32_t cpnd_res_ckpt_sec_add(CP > static bool cpnd_find_exact_ckptinfo(CPND_CB *cb, CKPT_INFO *ckpt_info, > uint32_t bitmap_offset, >uint32_t *offset, uint32_t > *prev_offset); > static void cpnd_clear_ckpt_info(CPND_CB *cb, CPND_CKPT_NODE *cp_node, > uint32_t curr_offset, uint32_t prev_offset); > -static uint32_t cpnd_restore_client_info(CPND_CB *cb, uint8_t *cli_addr); > -static uint32_t cpnd_restore_ckpt_info_v1(CPND_CB *cb, uint8_t *ckpt_addr, > SaClmNodeIdT nodeid); > -static uint32_t cpnd_restore_ckpt_info_v0(CPND_CB *cb, uint8_t *ckpt_addr, > SaClmNodeIdT nodeid); > -static void cpnd_destroy_shm_cpnd_cp_info(NCS_OS_POSIX_SHM_REQ_OPEN_INFO > *open_req); > -static void *cpnd_create_shm_cpnd_cp_info(NCS_OS_POSIX_SHM_REQ_INFO > *req_info); > -static void cpnd_update_shm_cpnd_cp_info(CPND_CB *cb); > -static void cpnd_convert_cp_info_v0(CKPT_INFO_V0 *cp_info_v0, CKPT_INFO > *cp_info); > +static void cpnd_destroy_shm(NCS_OS_POSIX_SHM_REQ_OPEN_INFO *open_req); > +static uint32_t cpnd_shm_extended_open(CPND_CB *cb, uint32_t flag); > +static uint32_t cpnd_extended_name_lend(SaConstStringT value, SaNameT* name); > +static SaConstStringT cpnd_extended_name_borrow(const SaNameT* name); > +static void cpnd_extended_name_free(const SaNameT* name); > > > /*** > * >* Name : cpnd_client_extract_bits > @@ -324,10 +320,24 @@ void cpnd_restart_update_timer(CPND_CB * > > void *cpnd_restart_shm_create(NCS_OS_POSIX_SHM_REQ_INFO *cpnd_open_req, > CPND_CB *cb, SaClmNodeIdT nodeid) > { > - uint32_t rc = NCSCC_RC_SUCCESS; > + uint32_t
Re: [devel] [PATCH 1 of 1] cpnd: use shared memory based on ckpt name length [#2108] V2
Hi Hoan, What I tested is a simple default cpsv `small format shm` ( Short-DN) functionality, this `big format shm` (LONG-DN) fix has side-effect/implication on existing functionality and these issue will become more complex to handle bugs, so please test all case as follows with new patch and then publish: 1) `small format shm` ( Short-DN), 2) `big format shm` ( LONG-DN) 3) combination of both `small format shm` ( Short-DN) & `big format shm` ( LONG-DN) -AVM On 10/26/2016 10:51 AM, Vo Minh Hoang wrote: > Dear Mahesh, > > Thank you very much for your help. > Compared to your test app I found my test stop too soon. > After reboot I just check shm existence, did not check to open again. > > I will send fix patch soon after carefully test it again. > > Thank you and best regards, > Hoang > > -Original Message- > From: A V Mahesh [mailto:mahesh.va...@oracle.com] > Sent: Wednesday, October 26, 2016 11:02 AM > To: Vo Minh Hoang; anders.wid...@ericsson.com > Cc: opensaf-devel@lists.sourceforge.net > Subject: Re: [PATCH 1 of 1] cpnd: use shared memory based on ckpt name > length [#2108] V2 > > Hi Hoang, > > The attached `test_#2108_app.c` application will generate cpnd shm open > request is getting failed case > > #gcc test_#2108_app.c -o checkpoint -lSaCkpt > > -AVM > > On 10/25/2016 12:23 PM, A V Mahesh wrote: >> Hi Hoang, >> >> On 10/25/2016 12:10 PM, Vo Minh Hoang wrote: >>> Would you please tell me the process to reproduce this error? >> I will write standalone application and will share with you . >> >> -AVM >> >> On 10/25/2016 12:10 PM, Vo Minh Hoang wrote: >>> Dear Mahesh, >>> >>> Thank you very much for your checking. >>> It is very strangle that I tested with 2 following case: >>> - restart nd by kill -9 >>> - restart node by kill -9 >>> Both cases executed well in my local machine. >>> >>> Would you please tell me the process to reproduce this error? >>> It is very strangle that ER is cannot open replica's shm that is not in >>> touch of this patch. >>> >>> Thank you and best regards, >>> Hoang >>> >>> -Original Message- >>> From: A V Mahesh [mailto:mahesh.va...@oracle.com] >>> Sent: Tuesday, October 25, 2016 12:53 PM >>> To: Hoang Vo ; anders.wid...@ericsson.com >>> Cc: opensaf-devel@lists.sourceforge.net >>> Subject: Re: [PATCH 1 of 1] cpnd: use shared memory based on ckpt name >>> length [#2108] V2 >>> >>> Hi Hoang, >>> >>> With the patch after CPND restart cpnd shm open request is getting >>> failed >>> >>> please test CPND restart cases. >>> >>> > > >>> >>> >>> saCkptCheckpointOpen returned checkpointHandle 626040 >>> 222 saCkptCheckpointOpen returned checkpointHandle 6261f0 >>> Before pkill osafckptnd saCkptCheckpointOpen >>> root 23946 1 0 11:14 ?00:00:00 >>> /usr/lib64/opensaf/osafckptnd >>> root 24041 24038 0 11:15 pts/000:00:00 sh -c ps -ef | grep >>> osafckptnd >>> root 24043 24041 0 11:15 pts/000:00:00 grep osafckptnd >>> Oct 25 11:15:07 SC-1 osafckptnd[23946]: exiting for shutdown >>> Oct 25 11:15:07 SC-1 osafamfnd[23844]: NO >>> 'safSu=SC-1,safSg=NoRed,safApp=OpenSAF' component restart probation >>> timer started (timeout: 600 ns) >>> Oct 25 11:15:07 SC-1 osafamfnd[23844]: NO Restarting a component of >>> 'safSu=SC-1,safSg=NoRed,safApp=OpenSAF' (comp restart count: 1) >>> Oct 25 11:15:07 SC-1 osafamfnd[23844]: NO >>> 'safComp=CPND,safSu=SC-1,safSg=NoRed,safApp=OpenSAF' faulted due to >>> 'avaDown' : Recovery is 'componentRestart' >>> Oct 25 11:15:07 SC-1 osafckptd[23989]: NO cpnd_down_process:: Start >>> CPND_RETENTION timer id = 0x663f10, arg=0x664020 >>> Oct 25 11:15:07 SC-1 osafckptnd[24058]: Started >>> VV saCkptCheckpointOpen 3rd may hit try again returned 18. >>> 333 saCkptCheckpointOpen returned checkpointHandle 7f29fbdc7588 >>> VV saCkptCheckpointOpen 4th returned may hit try again >>> returned 12. >>> 444 saCkptCheckpointOpen returned checkpointHandle 7fffb4a097d8 >>> saCkptCheckpointOpen 5th returned 12. >>> saCkptCheckpointOpen returned checkpointHandle 7f29fbdf61a8 >>> Before pkill osafckptnd & saCkptCheckpointClose >>> root 24058 1 0 11:15 ?00:00:00 >>> /usr/lib64/opensaf/osafckptnd >>> root 24063 24038 0 11:15 pts/000:00:00 sh -c ps -ef | grep >>> osafckptnd >>> root 24065 24063 0 11:15 pts/000:00:00 grep osafckptnd >>> Oct 25 11:15:19 SC-1 osafckptnd[24058]: exiting for shutdown >>> Oct 25 11:15:19 SC-1 osafamfnd[23844]: NO Restarting a component of >>> 'safSu=SC-1,safSg=NoRed,safApp=OpenSAF' (comp restart count: 2) >>> Oct 25 11:15:19 SC-1 osafamfnd[23844]: NO >>> 'safComp=CPND,safSu=SC-1,safSg=NoRed,safApp=OpenSAF' faulted due to >>> 'avaDown' : Recovery is 'componentRestart' >>> Oct 25 11:15:19 SC-1 osafckptnd[24080]: Started >>> Oct 25
Re: [devel] [PATCH 1 of 1] cpnd: use shared memory based on ckpt name length [#2108] V2
Dear Mahesh, Thank you very much for your help. Compared to your test app I found my test stop too soon. After reboot I just check shm existence, did not check to open again. I will send fix patch soon after carefully test it again. Thank you and best regards, Hoang -Original Message- From: A V Mahesh [mailto:mahesh.va...@oracle.com] Sent: Wednesday, October 26, 2016 11:02 AM To: Vo Minh Hoang; anders.wid...@ericsson.com Cc: opensaf-devel@lists.sourceforge.net Subject: Re: [PATCH 1 of 1] cpnd: use shared memory based on ckpt name length [#2108] V2 Hi Hoang, The attached `test_#2108_app.c` application will generate cpnd shm open request is getting failed case #gcc test_#2108_app.c -o checkpoint -lSaCkpt -AVM On 10/25/2016 12:23 PM, A V Mahesh wrote: > Hi Hoang, > > On 10/25/2016 12:10 PM, Vo Minh Hoang wrote: >> Would you please tell me the process to reproduce this error? > I will write standalone application and will share with you . > > -AVM > > On 10/25/2016 12:10 PM, Vo Minh Hoang wrote: >> Dear Mahesh, >> >> Thank you very much for your checking. >> It is very strangle that I tested with 2 following case: >> - restart nd by kill -9 >> - restart node by kill -9 >> Both cases executed well in my local machine. >> >> Would you please tell me the process to reproduce this error? >> It is very strangle that ER is cannot open replica's shm that is not in >> touch of this patch. >> >> Thank you and best regards, >> Hoang >> >> -Original Message- >> From: A V Mahesh [mailto:mahesh.va...@oracle.com] >> Sent: Tuesday, October 25, 2016 12:53 PM >> To: Hoang Vo ; anders.wid...@ericsson.com >> Cc: opensaf-devel@lists.sourceforge.net >> Subject: Re: [PATCH 1 of 1] cpnd: use shared memory based on ckpt name >> length [#2108] V2 >> >> Hi Hoang, >> >> With the patch after CPND restart cpnd shm open request is getting >> failed >> >> please test CPND restart cases. >> >> >> >> >> >> saCkptCheckpointOpen returned checkpointHandle 626040 >> 222 saCkptCheckpointOpen returned checkpointHandle 6261f0 >>Before pkill osafckptnd saCkptCheckpointOpen >> root 23946 1 0 11:14 ?00:00:00 >> /usr/lib64/opensaf/osafckptnd >> root 24041 24038 0 11:15 pts/000:00:00 sh -c ps -ef | grep >> osafckptnd >> root 24043 24041 0 11:15 pts/000:00:00 grep osafckptnd >> Oct 25 11:15:07 SC-1 osafckptnd[23946]: exiting for shutdown >> Oct 25 11:15:07 SC-1 osafamfnd[23844]: NO >> 'safSu=SC-1,safSg=NoRed,safApp=OpenSAF' component restart probation >> timer started (timeout: 600 ns) >> Oct 25 11:15:07 SC-1 osafamfnd[23844]: NO Restarting a component of >> 'safSu=SC-1,safSg=NoRed,safApp=OpenSAF' (comp restart count: 1) >> Oct 25 11:15:07 SC-1 osafamfnd[23844]: NO >> 'safComp=CPND,safSu=SC-1,safSg=NoRed,safApp=OpenSAF' faulted due to >> 'avaDown' : Recovery is 'componentRestart' >> Oct 25 11:15:07 SC-1 osafckptd[23989]: NO cpnd_down_process:: Start >> CPND_RETENTION timer id = 0x663f10, arg=0x664020 >> Oct 25 11:15:07 SC-1 osafckptnd[24058]: Started >>VV saCkptCheckpointOpen 3rd may hit try again returned 18. >> 333 saCkptCheckpointOpen returned checkpointHandle 7f29fbdc7588 >>VV saCkptCheckpointOpen 4th returned may hit try again >> returned 12. >> 444 saCkptCheckpointOpen returned checkpointHandle 7fffb4a097d8 >>saCkptCheckpointOpen 5th returned 12. >> saCkptCheckpointOpen returned checkpointHandle 7f29fbdf61a8 >>Before pkill osafckptnd & saCkptCheckpointClose >> root 24058 1 0 11:15 ?00:00:00 >> /usr/lib64/opensaf/osafckptnd >> root 24063 24038 0 11:15 pts/000:00:00 sh -c ps -ef | grep >> osafckptnd >> root 24065 24063 0 11:15 pts/000:00:00 grep osafckptnd >> Oct 25 11:15:19 SC-1 osafckptnd[24058]: exiting for shutdown >> Oct 25 11:15:19 SC-1 osafamfnd[23844]: NO Restarting a component of >> 'safSu=SC-1,safSg=NoRed,safApp=OpenSAF' (comp restart count: 2) >> Oct 25 11:15:19 SC-1 osafamfnd[23844]: NO >> 'safComp=CPND,safSu=SC-1,safSg=NoRed,safApp=OpenSAF' faulted due to >> 'avaDown' : Recovery is 'componentRestart' >> Oct 25 11:15:19 SC-1 osafckptnd[24080]: Started >> Oct 25 11:15:19 SC-1 osafckptnd[24080]: ER cpnd shm open request failed >> safCkpt=checkpoint_tes_131343_1 >> Oct 25 11:15:19 SC-1 osafckptnd[24080]: ER cpnd shm open request failed >> safCkpt=checkpoint_tes_131343_1 >> saCkptCheckpointClose checkpointHandle 626040 >> Attempt 0-0: saCkptCheckpointClose returned 12. >> 222 saCkptCheckpointClose checkpointHandle 6261f0 >> Attempt 0-0: saCkptCheckpointClose returned 12. >> 333 saCkptCheckpointClose checkpointHandle 7f29fbdc7588 >> Attempt 0-0: saCkptCheckpointClose returned 9. >> saCkptCheckpointClose checkpointHandle 7fffb4a097d8 >> Attempt 0-0: saCkptCheckpointClose returned 9. >> 555
Re: [devel] [PATCH 1 of 1] cpnd: use shared memory based on ckpt name length [#2108] V2
Hi Hoang, The attached `test_#2108_app.c` application will generate cpnd shm open request is getting failed case #gcc test_#2108_app.c -o checkpoint -lSaCkpt -AVM On 10/25/2016 12:23 PM, A V Mahesh wrote: Hi Hoang, On 10/25/2016 12:10 PM, Vo Minh Hoang wrote: Would you please tell me the process to reproduce this error? I will write standalone application and will share with you . -AVM On 10/25/2016 12:10 PM, Vo Minh Hoang wrote: Dear Mahesh, Thank you very much for your checking. It is very strangle that I tested with 2 following case: - restart nd by kill -9 - restart node by kill -9 Both cases executed well in my local machine. Would you please tell me the process to reproduce this error? It is very strangle that ER is cannot open replica's shm that is not in touch of this patch. Thank you and best regards, Hoang -Original Message- From: A V Mahesh [mailto:mahesh.va...@oracle.com] Sent: Tuesday, October 25, 2016 12:53 PM To: Hoang Vo; anders.wid...@ericsson.com Cc: opensaf-devel@lists.sourceforge.net Subject: Re: [PATCH 1 of 1] cpnd: use shared memory based on ckpt name length [#2108] V2 Hi Hoang, With the patch after CPND restart cpnd shm open request is getting failed please test CPND restart cases. saCkptCheckpointOpen returned checkpointHandle 626040 222 saCkptCheckpointOpen returned checkpointHandle 6261f0 Before pkill osafckptnd saCkptCheckpointOpen root 23946 1 0 11:14 ?00:00:00 /usr/lib64/opensaf/osafckptnd root 24041 24038 0 11:15 pts/000:00:00 sh -c ps -ef | grep osafckptnd root 24043 24041 0 11:15 pts/000:00:00 grep osafckptnd Oct 25 11:15:07 SC-1 osafckptnd[23946]: exiting for shutdown Oct 25 11:15:07 SC-1 osafamfnd[23844]: NO 'safSu=SC-1,safSg=NoRed,safApp=OpenSAF' component restart probation timer started (timeout: 600 ns) Oct 25 11:15:07 SC-1 osafamfnd[23844]: NO Restarting a component of 'safSu=SC-1,safSg=NoRed,safApp=OpenSAF' (comp restart count: 1) Oct 25 11:15:07 SC-1 osafamfnd[23844]: NO 'safComp=CPND,safSu=SC-1,safSg=NoRed,safApp=OpenSAF' faulted due to 'avaDown' : Recovery is 'componentRestart' Oct 25 11:15:07 SC-1 osafckptd[23989]: NO cpnd_down_process:: Start CPND_RETENTION timer id = 0x663f10, arg=0x664020 Oct 25 11:15:07 SC-1 osafckptnd[24058]: Started VV saCkptCheckpointOpen 3rd may hit try again returned 18. 333 saCkptCheckpointOpen returned checkpointHandle 7f29fbdc7588 VV saCkptCheckpointOpen 4th returned may hit try again returned 12. 444 saCkptCheckpointOpen returned checkpointHandle 7fffb4a097d8 saCkptCheckpointOpen 5th returned 12. saCkptCheckpointOpen returned checkpointHandle 7f29fbdf61a8 Before pkill osafckptnd & saCkptCheckpointClose root 24058 1 0 11:15 ?00:00:00 /usr/lib64/opensaf/osafckptnd root 24063 24038 0 11:15 pts/000:00:00 sh -c ps -ef | grep osafckptnd root 24065 24063 0 11:15 pts/000:00:00 grep osafckptnd Oct 25 11:15:19 SC-1 osafckptnd[24058]: exiting for shutdown Oct 25 11:15:19 SC-1 osafamfnd[23844]: NO Restarting a component of 'safSu=SC-1,safSg=NoRed,safApp=OpenSAF' (comp restart count: 2) Oct 25 11:15:19 SC-1 osafamfnd[23844]: NO 'safComp=CPND,safSu=SC-1,safSg=NoRed,safApp=OpenSAF' faulted due to 'avaDown' : Recovery is 'componentRestart' Oct 25 11:15:19 SC-1 osafckptnd[24080]: Started Oct 25 11:15:19 SC-1 osafckptnd[24080]: ER cpnd shm open request failed safCkpt=checkpoint_tes_131343_1 Oct 25 11:15:19 SC-1 osafckptnd[24080]: ER cpnd shm open request failed safCkpt=checkpoint_tes_131343_1 saCkptCheckpointClose checkpointHandle 626040 Attempt 0-0: saCkptCheckpointClose returned 12. 222 saCkptCheckpointClose checkpointHandle 6261f0 Attempt 0-0: saCkptCheckpointClose returned 12. 333 saCkptCheckpointClose checkpointHandle 7f29fbdc7588 Attempt 0-0: saCkptCheckpointClose returned 9. saCkptCheckpointClose checkpointHandle 7fffb4a097d8 Attempt 0-0: saCkptCheckpointClose returned 9. 555 saCkptCheckpointClose checkpointHandle 7f29fbdf61a8 Attempt 0-0: saCkptCheckpointClose returned 9. saCkptCheckpointOpen returned checkpointHandle 626040 222 saCkptCheckpointOpen returned checkpointHandle 628b40 Before pkill osafckptnd saCkptCheckpointOpen root 24080 1 0 11:15 ?00:00:00 /usr/lib64/opensaf/osafckptnd root 24085 24038 0 11:15 pts/000:00:00 sh -c ps -ef | grep osafckptnd root 24087 24085 0 11:15 pts/000:00:00 grep osafckptnd Oct 25 11:15:26 SC-1 osafckptnd[24080]: exiting for shutdown Oct 25 11:15:26 SC-1 osafamfnd[23844]: NO Restarting a component of 'safSu=SC-1,safSg=NoRed,safApp=OpenSAF' (comp restart count: 3) Oct 25 11:15:26 SC-1 osafamfnd[23844]: NO 'safComp=CPND,safSu=SC-1,safSg=NoRed,safApp=OpenSAF' faulted due to 'avaDown' : Recovery is 'componentRestart' Oct 25 11:15:26 SC-1
Re: [devel] [PATCH 1 of 1] cpnd: use shared memory based on ckpt name length [#2108] V2
Hi Hoang, On 10/25/2016 12:10 PM, Vo Minh Hoang wrote: > Would you please tell me the process to reproduce this error? I will write standalone application and will share with you . -AVM On 10/25/2016 12:10 PM, Vo Minh Hoang wrote: > Dear Mahesh, > > Thank you very much for your checking. > It is very strangle that I tested with 2 following case: > - restart nd by kill -9 > - restart node by kill -9 > Both cases executed well in my local machine. > > Would you please tell me the process to reproduce this error? > It is very strangle that ER is cannot open replica's shm that is not in > touch of this patch. > > Thank you and best regards, > Hoang > > -Original Message- > From: A V Mahesh [mailto:mahesh.va...@oracle.com] > Sent: Tuesday, October 25, 2016 12:53 PM > To: Hoang Vo; anders.wid...@ericsson.com > Cc: opensaf-devel@lists.sourceforge.net > Subject: Re: [PATCH 1 of 1] cpnd: use shared memory based on ckpt name > length [#2108] V2 > > Hi Hoang, > > With the patch after CPND restart cpnd shm open request is getting failed > > please test CPND restart cases. > > > > > saCkptCheckpointOpen returned checkpointHandle 626040 > 222 saCkptCheckpointOpen returned checkpointHandle 6261f0 >Before pkill osafckptnd saCkptCheckpointOpen > root 23946 1 0 11:14 ?00:00:00 > /usr/lib64/opensaf/osafckptnd > root 24041 24038 0 11:15 pts/000:00:00 sh -c ps -ef | grep > osafckptnd > root 24043 24041 0 11:15 pts/000:00:00 grep osafckptnd > Oct 25 11:15:07 SC-1 osafckptnd[23946]: exiting for shutdown > Oct 25 11:15:07 SC-1 osafamfnd[23844]: NO > 'safSu=SC-1,safSg=NoRed,safApp=OpenSAF' component restart probation > timer started (timeout: 600 ns) > Oct 25 11:15:07 SC-1 osafamfnd[23844]: NO Restarting a component of > 'safSu=SC-1,safSg=NoRed,safApp=OpenSAF' (comp restart count: 1) > Oct 25 11:15:07 SC-1 osafamfnd[23844]: NO > 'safComp=CPND,safSu=SC-1,safSg=NoRed,safApp=OpenSAF' faulted due to > 'avaDown' : Recovery is 'componentRestart' > Oct 25 11:15:07 SC-1 osafckptd[23989]: NO cpnd_down_process:: Start > CPND_RETENTION timer id = 0x663f10, arg=0x664020 > Oct 25 11:15:07 SC-1 osafckptnd[24058]: Started >VV saCkptCheckpointOpen 3rd may hit try again returned 18. > 333 saCkptCheckpointOpen returned checkpointHandle 7f29fbdc7588 >VV saCkptCheckpointOpen 4th returned may hit try again > returned 12. > 444 saCkptCheckpointOpen returned checkpointHandle 7fffb4a097d8 >saCkptCheckpointOpen 5th returned 12. > saCkptCheckpointOpen returned checkpointHandle 7f29fbdf61a8 >Before pkill osafckptnd & saCkptCheckpointClose > root 24058 1 0 11:15 ?00:00:00 > /usr/lib64/opensaf/osafckptnd > root 24063 24038 0 11:15 pts/000:00:00 sh -c ps -ef | grep > osafckptnd > root 24065 24063 0 11:15 pts/000:00:00 grep osafckptnd > Oct 25 11:15:19 SC-1 osafckptnd[24058]: exiting for shutdown > Oct 25 11:15:19 SC-1 osafamfnd[23844]: NO Restarting a component of > 'safSu=SC-1,safSg=NoRed,safApp=OpenSAF' (comp restart count: 2) > Oct 25 11:15:19 SC-1 osafamfnd[23844]: NO > 'safComp=CPND,safSu=SC-1,safSg=NoRed,safApp=OpenSAF' faulted due to > 'avaDown' : Recovery is 'componentRestart' > Oct 25 11:15:19 SC-1 osafckptnd[24080]: Started > Oct 25 11:15:19 SC-1 osafckptnd[24080]: ER cpnd shm open request failed > safCkpt=checkpoint_tes_131343_1 > Oct 25 11:15:19 SC-1 osafckptnd[24080]: ER cpnd shm open request failed > safCkpt=checkpoint_tes_131343_1 > saCkptCheckpointClose checkpointHandle 626040 > Attempt 0-0: saCkptCheckpointClose returned 12. > 222 saCkptCheckpointClose checkpointHandle 6261f0 > Attempt 0-0: saCkptCheckpointClose returned 12. > 333 saCkptCheckpointClose checkpointHandle 7f29fbdc7588 > Attempt 0-0: saCkptCheckpointClose returned 9. > saCkptCheckpointClose checkpointHandle 7fffb4a097d8 > Attempt 0-0: saCkptCheckpointClose returned 9. > 555 saCkptCheckpointClose checkpointHandle 7f29fbdf61a8 > Attempt 0-0: saCkptCheckpointClose returned 9. > saCkptCheckpointOpen returned checkpointHandle 626040 > 222 saCkptCheckpointOpen returned checkpointHandle 628b40 >Before pkill osafckptnd saCkptCheckpointOpen > root 24080 1 0 11:15 ?00:00:00 > /usr/lib64/opensaf/osafckptnd > root 24085 24038 0 11:15 pts/000:00:00 sh -c ps -ef | grep > osafckptnd > root 24087 24085 0 11:15 pts/000:00:00 grep osafckptnd > Oct 25 11:15:26 SC-1 osafckptnd[24080]: exiting for shutdown > Oct 25 11:15:26 SC-1 osafamfnd[23844]: NO Restarting a component of > 'safSu=SC-1,safSg=NoRed,safApp=OpenSAF' (comp restart count: 3) > Oct 25 11:15:26 SC-1 osafamfnd[23844]: NO > 'safComp=CPND,safSu=SC-1,safSg=NoRed,safApp=OpenSAF' faulted due to > 'avaDown' : Recovery is 'componentRestart' > Oct 25 11:15:26 SC-1 osafckptd[23989]: NO
Re: [devel] [PATCH 1 of 1] cpnd: use shared memory based on ckpt name length [#2108] V2
Dear Mahesh, Thank you very much for your checking. It is very strangle that I tested with 2 following case: - restart nd by kill -9 - restart node by kill -9 Both cases executed well in my local machine. Would you please tell me the process to reproduce this error? It is very strangle that ER is cannot open replica's shm that is not in touch of this patch. Thank you and best regards, Hoang -Original Message- From: A V Mahesh [mailto:mahesh.va...@oracle.com] Sent: Tuesday, October 25, 2016 12:53 PM To: Hoang Vo; anders.wid...@ericsson.com Cc: opensaf-devel@lists.sourceforge.net Subject: Re: [PATCH 1 of 1] cpnd: use shared memory based on ckpt name length [#2108] V2 Hi Hoang, With the patch after CPND restart cpnd shm open request is getting failed please test CPND restart cases. saCkptCheckpointOpen returned checkpointHandle 626040 222 saCkptCheckpointOpen returned checkpointHandle 6261f0 Before pkill osafckptnd saCkptCheckpointOpen root 23946 1 0 11:14 ?00:00:00 /usr/lib64/opensaf/osafckptnd root 24041 24038 0 11:15 pts/000:00:00 sh -c ps -ef | grep osafckptnd root 24043 24041 0 11:15 pts/000:00:00 grep osafckptnd Oct 25 11:15:07 SC-1 osafckptnd[23946]: exiting for shutdown Oct 25 11:15:07 SC-1 osafamfnd[23844]: NO 'safSu=SC-1,safSg=NoRed,safApp=OpenSAF' component restart probation timer started (timeout: 600 ns) Oct 25 11:15:07 SC-1 osafamfnd[23844]: NO Restarting a component of 'safSu=SC-1,safSg=NoRed,safApp=OpenSAF' (comp restart count: 1) Oct 25 11:15:07 SC-1 osafamfnd[23844]: NO 'safComp=CPND,safSu=SC-1,safSg=NoRed,safApp=OpenSAF' faulted due to 'avaDown' : Recovery is 'componentRestart' Oct 25 11:15:07 SC-1 osafckptd[23989]: NO cpnd_down_process:: Start CPND_RETENTION timer id = 0x663f10, arg=0x664020 Oct 25 11:15:07 SC-1 osafckptnd[24058]: Started VV saCkptCheckpointOpen 3rd may hit try again returned 18. 333 saCkptCheckpointOpen returned checkpointHandle 7f29fbdc7588 VV saCkptCheckpointOpen 4th returned may hit try again returned 12. 444 saCkptCheckpointOpen returned checkpointHandle 7fffb4a097d8 saCkptCheckpointOpen 5th returned 12. saCkptCheckpointOpen returned checkpointHandle 7f29fbdf61a8 Before pkill osafckptnd & saCkptCheckpointClose root 24058 1 0 11:15 ?00:00:00 /usr/lib64/opensaf/osafckptnd root 24063 24038 0 11:15 pts/000:00:00 sh -c ps -ef | grep osafckptnd root 24065 24063 0 11:15 pts/000:00:00 grep osafckptnd Oct 25 11:15:19 SC-1 osafckptnd[24058]: exiting for shutdown Oct 25 11:15:19 SC-1 osafamfnd[23844]: NO Restarting a component of 'safSu=SC-1,safSg=NoRed,safApp=OpenSAF' (comp restart count: 2) Oct 25 11:15:19 SC-1 osafamfnd[23844]: NO 'safComp=CPND,safSu=SC-1,safSg=NoRed,safApp=OpenSAF' faulted due to 'avaDown' : Recovery is 'componentRestart' Oct 25 11:15:19 SC-1 osafckptnd[24080]: Started Oct 25 11:15:19 SC-1 osafckptnd[24080]: ER cpnd shm open request failed safCkpt=checkpoint_tes_131343_1 Oct 25 11:15:19 SC-1 osafckptnd[24080]: ER cpnd shm open request failed safCkpt=checkpoint_tes_131343_1 saCkptCheckpointClose checkpointHandle 626040 Attempt 0-0: saCkptCheckpointClose returned 12. 222 saCkptCheckpointClose checkpointHandle 6261f0 Attempt 0-0: saCkptCheckpointClose returned 12. 333 saCkptCheckpointClose checkpointHandle 7f29fbdc7588 Attempt 0-0: saCkptCheckpointClose returned 9. saCkptCheckpointClose checkpointHandle 7fffb4a097d8 Attempt 0-0: saCkptCheckpointClose returned 9. 555 saCkptCheckpointClose checkpointHandle 7f29fbdf61a8 Attempt 0-0: saCkptCheckpointClose returned 9. saCkptCheckpointOpen returned checkpointHandle 626040 222 saCkptCheckpointOpen returned checkpointHandle 628b40 Before pkill osafckptnd saCkptCheckpointOpen root 24080 1 0 11:15 ?00:00:00 /usr/lib64/opensaf/osafckptnd root 24085 24038 0 11:15 pts/000:00:00 sh -c ps -ef | grep osafckptnd root 24087 24085 0 11:15 pts/000:00:00 grep osafckptnd Oct 25 11:15:26 SC-1 osafckptnd[24080]: exiting for shutdown Oct 25 11:15:26 SC-1 osafamfnd[23844]: NO Restarting a component of 'safSu=SC-1,safSg=NoRed,safApp=OpenSAF' (comp restart count: 3) Oct 25 11:15:26 SC-1 osafamfnd[23844]: NO 'safComp=CPND,safSu=SC-1,safSg=NoRed,safApp=OpenSAF' faulted due to 'avaDown' : Recovery is 'componentRestart' Oct 25 11:15:26 SC-1 osafckptd[23989]: NO cpnd_down_process:: Start CPND_RETENTION timer id = 0x663f10, arg=0x664020 Oct 25 11:15:26 SC-1 osafckptnd[24102]: Started Oct 25 11:15:26 SC-1 osafckptnd[24102]: ER cpnd shm open request failed safCkpt=checkpoint_tes_131343_1 Oct 25 11:15:26 SC-1 osafckptnd[24102]: ER cpnd shm open request failed safCkpt=checkpoint_tes_131343_1 VV saCkptCheckpointOpen 3rd may hit try again returned 18. 333
Re: [devel] [PATCH 1 of 1] cpnd: use shared memory based on ckpt name length [#2108] V2
Hi Hoang, With the patch after CPND restart cpnd shm open request is getting failed please test CPND restart cases. saCkptCheckpointOpen returned checkpointHandle 626040 222 saCkptCheckpointOpen returned checkpointHandle 6261f0 Before pkill osafckptnd saCkptCheckpointOpen root 23946 1 0 11:14 ?00:00:00 /usr/lib64/opensaf/osafckptnd root 24041 24038 0 11:15 pts/000:00:00 sh -c ps -ef | grep osafckptnd root 24043 24041 0 11:15 pts/000:00:00 grep osafckptnd Oct 25 11:15:07 SC-1 osafckptnd[23946]: exiting for shutdown Oct 25 11:15:07 SC-1 osafamfnd[23844]: NO 'safSu=SC-1,safSg=NoRed,safApp=OpenSAF' component restart probation timer started (timeout: 600 ns) Oct 25 11:15:07 SC-1 osafamfnd[23844]: NO Restarting a component of 'safSu=SC-1,safSg=NoRed,safApp=OpenSAF' (comp restart count: 1) Oct 25 11:15:07 SC-1 osafamfnd[23844]: NO 'safComp=CPND,safSu=SC-1,safSg=NoRed,safApp=OpenSAF' faulted due to 'avaDown' : Recovery is 'componentRestart' Oct 25 11:15:07 SC-1 osafckptd[23989]: NO cpnd_down_process:: Start CPND_RETENTION timer id = 0x663f10, arg=0x664020 Oct 25 11:15:07 SC-1 osafckptnd[24058]: Started VV saCkptCheckpointOpen 3rd may hit try again returned 18. 333 saCkptCheckpointOpen returned checkpointHandle 7f29fbdc7588 VV saCkptCheckpointOpen 4th returned may hit try again returned 12. 444 saCkptCheckpointOpen returned checkpointHandle 7fffb4a097d8 saCkptCheckpointOpen 5th returned 12. saCkptCheckpointOpen returned checkpointHandle 7f29fbdf61a8 Before pkill osafckptnd & saCkptCheckpointClose root 24058 1 0 11:15 ?00:00:00 /usr/lib64/opensaf/osafckptnd root 24063 24038 0 11:15 pts/000:00:00 sh -c ps -ef | grep osafckptnd root 24065 24063 0 11:15 pts/000:00:00 grep osafckptnd Oct 25 11:15:19 SC-1 osafckptnd[24058]: exiting for shutdown Oct 25 11:15:19 SC-1 osafamfnd[23844]: NO Restarting a component of 'safSu=SC-1,safSg=NoRed,safApp=OpenSAF' (comp restart count: 2) Oct 25 11:15:19 SC-1 osafamfnd[23844]: NO 'safComp=CPND,safSu=SC-1,safSg=NoRed,safApp=OpenSAF' faulted due to 'avaDown' : Recovery is 'componentRestart' Oct 25 11:15:19 SC-1 osafckptnd[24080]: Started Oct 25 11:15:19 SC-1 osafckptnd[24080]: ER cpnd shm open request failed safCkpt=checkpoint_tes_131343_1 Oct 25 11:15:19 SC-1 osafckptnd[24080]: ER cpnd shm open request failed safCkpt=checkpoint_tes_131343_1 saCkptCheckpointClose checkpointHandle 626040 Attempt 0-0: saCkptCheckpointClose returned 12. 222 saCkptCheckpointClose checkpointHandle 6261f0 Attempt 0-0: saCkptCheckpointClose returned 12. 333 saCkptCheckpointClose checkpointHandle 7f29fbdc7588 Attempt 0-0: saCkptCheckpointClose returned 9. saCkptCheckpointClose checkpointHandle 7fffb4a097d8 Attempt 0-0: saCkptCheckpointClose returned 9. 555 saCkptCheckpointClose checkpointHandle 7f29fbdf61a8 Attempt 0-0: saCkptCheckpointClose returned 9. saCkptCheckpointOpen returned checkpointHandle 626040 222 saCkptCheckpointOpen returned checkpointHandle 628b40 Before pkill osafckptnd saCkptCheckpointOpen root 24080 1 0 11:15 ?00:00:00 /usr/lib64/opensaf/osafckptnd root 24085 24038 0 11:15 pts/000:00:00 sh -c ps -ef | grep osafckptnd root 24087 24085 0 11:15 pts/000:00:00 grep osafckptnd Oct 25 11:15:26 SC-1 osafckptnd[24080]: exiting for shutdown Oct 25 11:15:26 SC-1 osafamfnd[23844]: NO Restarting a component of 'safSu=SC-1,safSg=NoRed,safApp=OpenSAF' (comp restart count: 3) Oct 25 11:15:26 SC-1 osafamfnd[23844]: NO 'safComp=CPND,safSu=SC-1,safSg=NoRed,safApp=OpenSAF' faulted due to 'avaDown' : Recovery is 'componentRestart' Oct 25 11:15:26 SC-1 osafckptd[23989]: NO cpnd_down_process:: Start CPND_RETENTION timer id = 0x663f10, arg=0x664020 Oct 25 11:15:26 SC-1 osafckptnd[24102]: Started Oct 25 11:15:26 SC-1 osafckptnd[24102]: ER cpnd shm open request failed safCkpt=checkpoint_tes_131343_1 Oct 25 11:15:26 SC-1 osafckptnd[24102]: ER cpnd shm open request failed safCkpt=checkpoint_tes_131343_1 VV saCkptCheckpointOpen 3rd may hit try again returned 18. 333 saCkptCheckpointOpen returned checkpointHandle 0 VV saCkptCheckpointOpen 4th returned may hit try again returned 12. 444 saCkptCheckpointOpen returned checkpointHandle 0 saCkptCheckpointOpen 5th returned 12. saCkptCheckpointOpen returned checkpointHandle 0 Before pkill osafckptnd & saCkptCheckpointClose root 24102 1 0 11:15 ?00:00:00 /usr/lib64/opensaf/osafckptnd root 24107 24038 0 11:15 pts/000:00:00 sh -c ps -ef | grep osafckptnd root 24109 24107 0 11:15 pts/000:00:00 grep osafckptnd Oct 25 11:15:38 SC-1 osafckptnd[24102]: exiting for shutdown Oct 25 11:15:38 SC-1 osafamfnd[23844]: NO Restarting a component of 'safSu=SC-1,safSg=NoRed,safApp=OpenSAF' (comp restart