Re: [devel] [PATCH 1 of 1] cpnd: use shared memory based on ckpt name length [#2108] V2
Hi Hoang, ACK form me. Function change looks less and code resignations looks much, to make review easy next time on-words please split the function change & code resignations in to different patches ( 1 of 2 & 2 of 2). Not Tested , in-service upgrade & log DN cases -AVM On 10/27/2016 12:48 PM, Vo Minh Hoang wrote: > Dear Mahesh, > > I tested with cases: > - Old active with new standby > - Old standby with new active > > Each case, create checkpoint, create section, write and read section, close > and unlink. > > Sincerely, > Hoang > > -Original Message- > From: A V Mahesh [mailto:mahesh.va...@oracle.com] > Sent: Thursday, October 27, 2016 1:58 PM > To: Hoang Vo ; anders.wid...@ericsson.com > Cc: opensaf-devel@lists.sourceforge.net > Subject: Re: [PATCH 1 of 1] cpnd: use shared memory based on ckpt name > length [#2108] V2 > > Hi Hoang, > > Have you tested in-service upgrade case ? > > -AVM > > > On 10/26/2016 2:33 PM, Hoang Vo wrote: >>osaf/libs/common/cpsv/include/cpsv_shm.h | 28 +- >>osaf/services/saf/cpsv/cpnd/cpnd_res.c | 868 > -- >>2 files changed, 355 insertions(+), 541 deletions(-) >> >> >> problem: In the case of CKPT osafckptnd increased 3,5Mb - 240 percent >> on all nodes CKPT_INFO size inscrease when support longDN lead to total > size increase. >> solution: >> - From start, cpnd use old format shm. >> - Run time cpnd keep using old format shm until first longDN checkpoint is > created. >> After that cpnd create extended format shm for longDN use. >> - Fix init size for shm. >> >> diff --git a/osaf/libs/common/cpsv/include/cpsv_shm.h >> b/osaf/libs/common/cpsv/include/cpsv_shm.h >> --- a/osaf/libs/common/cpsv/include/cpsv_shm.h >> +++ b/osaf/libs/common/cpsv/include/cpsv_shm.h >> @@ -27,7 +27,9 @@ >>#define SHM_NEXT -3 >>#define SHM_INIT -1 >> >> -#define CPSV_CPND_SHM_VERSION1 >> +#define CPSV_CPND_SHM_VERSION 1 >> +#define CPSV_CPND_SHM_VERSION_DEPRECATE 2 >> +#define CPSV_CPND_SHM_VERSION_EXTENDED 3 >> >>typedef struct cpsv_ckpt_hdr { >> SaCkptCheckpointHandleT ckpt_id;/* Index for identifying the > checkpoint */ >> @@ -57,7 +59,7 @@ typedef struct cpsv_sect_hdr { >>} CPSV_SECT_HDR; >> >>typedef struct ckpt_info { >> -char ckpt_name[kOsafMaxDnLength]; >> +SaNameT ckpt_name; >> SaCkptCheckpointHandleT ckpt_id; >> uint32_t maxSections; >> SaSizeT maxSecSize; >> @@ -74,23 +76,10 @@ typedef struct ckpt_info { >> int32_t next; >>} CKPT_INFO; >> >> -typedef struct ckpt_info_v0 { >> -SaNameT ckpt_name; >> -SaCkptCheckpointHandleT ckpt_id; >> -uint32_t maxSections; >> -SaSizeT maxSecSize; >> -NODE_ID node_id; >> -int32_t offset; >> -uint32_t client_bitmap; >> -int32_t is_valid; >> -uint32_t bm_offset; >> -bool is_unlink; >> -bool is_close; >> -bool cpnd_rep_create; >> -bool is_first; >> -SaTimeT close_time; >> -int32_t next; >> -} CKPT_INFO_V0; >> +typedef struct ckpt_extend_info { >> +char ckpt_name[kOsafMaxDnLength + 1]; >> +uint32_t is_valid; >> +} CKPT_EXTENDED_INFO; >> >>typedef struct client_info { >> SaCkptHandleT ckpt_app_hdl; >> @@ -109,6 +98,7 @@ typedef struct gbl_shm_ptr { >> void *base_addr; >> void *cli_addr; >> void *ckpt_addr; >> +void *extended_addr;/* Added in CPSV_CPND_SHM_VERSION_EXTENDED > */ >> int32_t n_clients; >> int32_t n_ckpts; >>} GBL_SHM_PTR; >> diff --git a/osaf/services/saf/cpsv/cpnd/cpnd_res.c >> b/osaf/services/saf/cpsv/cpnd/cpnd_res.c >> --- a/osaf/services/saf/cpsv/cpnd/cpnd_res.c >> +++ b/osaf/services/saf/cpsv/cpnd/cpnd_res.c >> @@ -40,8 +40,6 @@ >> >>#define m_CPND_CKPTINFO_READ(ckpt_info,addr,offset) >> memcpy(&ckpt_info,addr+offset,sizeof(CKPT_INFO)) >> >> -#define m_CPND_CKPTINFO_V0_READ(ckpt_info,addr,offset) >> memcpy(&ckpt_info,addr+offset,sizeof(CKPT_INFO_V0)) >> - >>#define m_CPND_CKPTINFO_UPDATE(addr,ckpt_info,offset) >> memcpy(addr+offset,&ckpt_info,sizeof(CKPT_INFO)) >> >>#define m_CPND_CKPTHDR_UPDATE(ckpt_hdr,offset) >> memcpy(offset,&ckpt_hdr,sizeof(CKPT_HDR)) >> @@ -50,13 +48,11 @@ static uint32_t cpnd_res_ckpt_sec_add(CP >>static bool cpnd_find_exact_ckptinfo(CPND_CB *cb, CKPT_INFO *ckpt_info, > uint32_t bitmap_offset, >> uint32_t *offset, uint32_t > *prev_offset); >>static void cpnd_clear_ckpt_info(CPND_CB *cb, CPND_CKPT_NODE >> *cp_node, uint32_t curr_offset, uint32_t prev_offset); -static >> uint32_t cpnd_restore_client_info(CPND_CB *cb, uint8_t *cli_addr); >> -static uint32_t cpnd_restore_ckpt_info_v1(CPND_CB *cb, uint8_t >> *ckpt_addr, SaClmNodeIdT nodeid); -static uint32_t >> cpnd_restore_ckpt_info_v0(CPND_CB *cb, uint8_t *ckpt_addr, >> SaClmNodeIdT nodeid); -static void >> cpnd_destroy_shm_cpnd_cp_info(NCS_OS_POSIX_SHM_REQ_OPEN_INFO >> *open_req); -static void >> *cpnd_create_shm_c
Re: [devel] [PATCH 1 of 1] cpnd: use shared memory based on ckpt name length [#2108] V2
Dear Mahesh, I tested with cases: - Old active with new standby - Old standby with new active Each case, create checkpoint, create section, write and read section, close and unlink. Sincerely, Hoang -Original Message- From: A V Mahesh [mailto:mahesh.va...@oracle.com] Sent: Thursday, October 27, 2016 1:58 PM To: Hoang Vo ; anders.wid...@ericsson.com Cc: opensaf-devel@lists.sourceforge.net Subject: Re: [PATCH 1 of 1] cpnd: use shared memory based on ckpt name length [#2108] V2 Hi Hoang, Have you tested in-service upgrade case ? -AVM On 10/26/2016 2:33 PM, Hoang Vo wrote: > osaf/libs/common/cpsv/include/cpsv_shm.h | 28 +- > osaf/services/saf/cpsv/cpnd/cpnd_res.c | 868 -- > 2 files changed, 355 insertions(+), 541 deletions(-) > > > problem: In the case of CKPT osafckptnd increased 3,5Mb - 240 percent > on all nodes CKPT_INFO size inscrease when support longDN lead to total size increase. > > solution: > - From start, cpnd use old format shm. > - Run time cpnd keep using old format shm until first longDN checkpoint is created. > After that cpnd create extended format shm for longDN use. > - Fix init size for shm. > > diff --git a/osaf/libs/common/cpsv/include/cpsv_shm.h > b/osaf/libs/common/cpsv/include/cpsv_shm.h > --- a/osaf/libs/common/cpsv/include/cpsv_shm.h > +++ b/osaf/libs/common/cpsv/include/cpsv_shm.h > @@ -27,7 +27,9 @@ > #define SHM_NEXT -3 > #define SHM_INIT -1 > > -#define CPSV_CPND_SHM_VERSION1 > +#define CPSV_CPND_SHM_VERSION1 > +#define CPSV_CPND_SHM_VERSION_DEPRECATE 2 > +#define CPSV_CPND_SHM_VERSION_EXTENDED 3 > > typedef struct cpsv_ckpt_hdr { > SaCkptCheckpointHandleT ckpt_id;/* Index for identifying the checkpoint */ > @@ -57,7 +59,7 @@ typedef struct cpsv_sect_hdr { > } CPSV_SECT_HDR; > > typedef struct ckpt_info { > - char ckpt_name[kOsafMaxDnLength]; > + SaNameT ckpt_name; > SaCkptCheckpointHandleT ckpt_id; > uint32_t maxSections; > SaSizeT maxSecSize; > @@ -74,23 +76,10 @@ typedef struct ckpt_info { > int32_t next; > } CKPT_INFO; > > -typedef struct ckpt_info_v0 { > - SaNameT ckpt_name; > - SaCkptCheckpointHandleT ckpt_id; > - uint32_t maxSections; > - SaSizeT maxSecSize; > - NODE_ID node_id; > - int32_t offset; > - uint32_t client_bitmap; > - int32_t is_valid; > - uint32_t bm_offset; > - bool is_unlink; > - bool is_close; > - bool cpnd_rep_create; > - bool is_first; > - SaTimeT close_time; > - int32_t next; > -} CKPT_INFO_V0; > +typedef struct ckpt_extend_info { > + char ckpt_name[kOsafMaxDnLength + 1]; > + uint32_t is_valid; > +} CKPT_EXTENDED_INFO; > > typedef struct client_info { > SaCkptHandleT ckpt_app_hdl; > @@ -109,6 +98,7 @@ typedef struct gbl_shm_ptr { > void *base_addr; > void *cli_addr; > void *ckpt_addr; > + void *extended_addr;/* Added in CPSV_CPND_SHM_VERSION_EXTENDED */ > int32_t n_clients; > int32_t n_ckpts; > } GBL_SHM_PTR; > diff --git a/osaf/services/saf/cpsv/cpnd/cpnd_res.c > b/osaf/services/saf/cpsv/cpnd/cpnd_res.c > --- a/osaf/services/saf/cpsv/cpnd/cpnd_res.c > +++ b/osaf/services/saf/cpsv/cpnd/cpnd_res.c > @@ -40,8 +40,6 @@ > > #define m_CPND_CKPTINFO_READ(ckpt_info,addr,offset) > memcpy(&ckpt_info,addr+offset,sizeof(CKPT_INFO)) > > -#define m_CPND_CKPTINFO_V0_READ(ckpt_info,addr,offset) > memcpy(&ckpt_info,addr+offset,sizeof(CKPT_INFO_V0)) > - > #define m_CPND_CKPTINFO_UPDATE(addr,ckpt_info,offset) > memcpy(addr+offset,&ckpt_info,sizeof(CKPT_INFO)) > > #define m_CPND_CKPTHDR_UPDATE(ckpt_hdr,offset) > memcpy(offset,&ckpt_hdr,sizeof(CKPT_HDR)) > @@ -50,13 +48,11 @@ static uint32_t cpnd_res_ckpt_sec_add(CP > static bool cpnd_find_exact_ckptinfo(CPND_CB *cb, CKPT_INFO *ckpt_info, uint32_t bitmap_offset, >uint32_t *offset, uint32_t *prev_offset); > static void cpnd_clear_ckpt_info(CPND_CB *cb, CPND_CKPT_NODE > *cp_node, uint32_t curr_offset, uint32_t prev_offset); -static > uint32_t cpnd_restore_client_info(CPND_CB *cb, uint8_t *cli_addr); > -static uint32_t cpnd_restore_ckpt_info_v1(CPND_CB *cb, uint8_t > *ckpt_addr, SaClmNodeIdT nodeid); -static uint32_t > cpnd_restore_ckpt_info_v0(CPND_CB *cb, uint8_t *ckpt_addr, > SaClmNodeIdT nodeid); -static void > cpnd_destroy_shm_cpnd_cp_info(NCS_OS_POSIX_SHM_REQ_OPEN_INFO > *open_req); -static void > *cpnd_create_shm_cpnd_cp_info(NCS_OS_POSIX_SHM_REQ_INFO *req_info); > -static void cpnd_update_shm_cpnd_cp_info(CPND_CB *cb); -static void > cpnd_convert_cp_info_v0(CKPT_INFO_V0 *cp_info_v0, CKPT_INFO *cp_info); > +static void cpnd_destroy_shm(NCS_OS_POSIX_SHM_REQ_OPEN_INFO > +*open_req); static uint32_t cpnd_shm_extended_open(CPND_CB *cb, > +uint32_t flag); static uint32_t > +cpnd_extended_name_lend(SaConstStringT value, SaNameT* name); static > +SaConstStringT cpnd
Re: [devel] [PATCH 1 of 1] cpnd: use shared memory based on ckpt name length [#2108] V2
Hi Hoang, Have you tested in-service upgrade case ? -AVM On 10/26/2016 2:33 PM, Hoang Vo wrote: > osaf/libs/common/cpsv/include/cpsv_shm.h | 28 +- > osaf/services/saf/cpsv/cpnd/cpnd_res.c | 868 > -- > 2 files changed, 355 insertions(+), 541 deletions(-) > > > problem: In the case of CKPT osafckptnd increased 3,5Mb - 240 percent on all > nodes > CKPT_INFO size inscrease when support longDN lead to total size increase. > > solution: > - From start, cpnd use old format shm. > - Run time cpnd keep using old format shm until first longDN checkpoint is > created. > After that cpnd create extended format shm for longDN use. > - Fix init size for shm. > > diff --git a/osaf/libs/common/cpsv/include/cpsv_shm.h > b/osaf/libs/common/cpsv/include/cpsv_shm.h > --- a/osaf/libs/common/cpsv/include/cpsv_shm.h > +++ b/osaf/libs/common/cpsv/include/cpsv_shm.h > @@ -27,7 +27,9 @@ > #define SHM_NEXT -3 > #define SHM_INIT -1 > > -#define CPSV_CPND_SHM_VERSION1 > +#define CPSV_CPND_SHM_VERSION1 > +#define CPSV_CPND_SHM_VERSION_DEPRECATE 2 > +#define CPSV_CPND_SHM_VERSION_EXTENDED 3 > > typedef struct cpsv_ckpt_hdr { > SaCkptCheckpointHandleT ckpt_id;/* Index for identifying the > checkpoint */ > @@ -57,7 +59,7 @@ typedef struct cpsv_sect_hdr { > } CPSV_SECT_HDR; > > typedef struct ckpt_info { > - char ckpt_name[kOsafMaxDnLength]; > + SaNameT ckpt_name; > SaCkptCheckpointHandleT ckpt_id; > uint32_t maxSections; > SaSizeT maxSecSize; > @@ -74,23 +76,10 @@ typedef struct ckpt_info { > int32_t next; > } CKPT_INFO; > > -typedef struct ckpt_info_v0 { > - SaNameT ckpt_name; > - SaCkptCheckpointHandleT ckpt_id; > - uint32_t maxSections; > - SaSizeT maxSecSize; > - NODE_ID node_id; > - int32_t offset; > - uint32_t client_bitmap; > - int32_t is_valid; > - uint32_t bm_offset; > - bool is_unlink; > - bool is_close; > - bool cpnd_rep_create; > - bool is_first; > - SaTimeT close_time; > - int32_t next; > -} CKPT_INFO_V0; > +typedef struct ckpt_extend_info { > + char ckpt_name[kOsafMaxDnLength + 1]; > + uint32_t is_valid; > +} CKPT_EXTENDED_INFO; > > typedef struct client_info { > SaCkptHandleT ckpt_app_hdl; > @@ -109,6 +98,7 @@ typedef struct gbl_shm_ptr { > void *base_addr; > void *cli_addr; > void *ckpt_addr; > + void *extended_addr;/* Added in CPSV_CPND_SHM_VERSION_EXTENDED */ > int32_t n_clients; > int32_t n_ckpts; > } GBL_SHM_PTR; > diff --git a/osaf/services/saf/cpsv/cpnd/cpnd_res.c > b/osaf/services/saf/cpsv/cpnd/cpnd_res.c > --- a/osaf/services/saf/cpsv/cpnd/cpnd_res.c > +++ b/osaf/services/saf/cpsv/cpnd/cpnd_res.c > @@ -40,8 +40,6 @@ > > #define m_CPND_CKPTINFO_READ(ckpt_info,addr,offset) > memcpy(&ckpt_info,addr+offset,sizeof(CKPT_INFO)) > > -#define m_CPND_CKPTINFO_V0_READ(ckpt_info,addr,offset) > memcpy(&ckpt_info,addr+offset,sizeof(CKPT_INFO_V0)) > - > #define m_CPND_CKPTINFO_UPDATE(addr,ckpt_info,offset) > memcpy(addr+offset,&ckpt_info,sizeof(CKPT_INFO)) > > #define m_CPND_CKPTHDR_UPDATE(ckpt_hdr,offset) > memcpy(offset,&ckpt_hdr,sizeof(CKPT_HDR)) > @@ -50,13 +48,11 @@ static uint32_t cpnd_res_ckpt_sec_add(CP > static bool cpnd_find_exact_ckptinfo(CPND_CB *cb, CKPT_INFO *ckpt_info, > uint32_t bitmap_offset, >uint32_t *offset, uint32_t > *prev_offset); > static void cpnd_clear_ckpt_info(CPND_CB *cb, CPND_CKPT_NODE *cp_node, > uint32_t curr_offset, uint32_t prev_offset); > -static uint32_t cpnd_restore_client_info(CPND_CB *cb, uint8_t *cli_addr); > -static uint32_t cpnd_restore_ckpt_info_v1(CPND_CB *cb, uint8_t *ckpt_addr, > SaClmNodeIdT nodeid); > -static uint32_t cpnd_restore_ckpt_info_v0(CPND_CB *cb, uint8_t *ckpt_addr, > SaClmNodeIdT nodeid); > -static void cpnd_destroy_shm_cpnd_cp_info(NCS_OS_POSIX_SHM_REQ_OPEN_INFO > *open_req); > -static void *cpnd_create_shm_cpnd_cp_info(NCS_OS_POSIX_SHM_REQ_INFO > *req_info); > -static void cpnd_update_shm_cpnd_cp_info(CPND_CB *cb); > -static void cpnd_convert_cp_info_v0(CKPT_INFO_V0 *cp_info_v0, CKPT_INFO > *cp_info); > +static void cpnd_destroy_shm(NCS_OS_POSIX_SHM_REQ_OPEN_INFO *open_req); > +static uint32_t cpnd_shm_extended_open(CPND_CB *cb, uint32_t flag); > +static uint32_t cpnd_extended_name_lend(SaConstStringT value, SaNameT* name); > +static SaConstStringT cpnd_extended_name_borrow(const SaNameT* name); > +static void cpnd_extended_name_free(const SaNameT* name); > > > /*** > * >* Name : cpnd_client_extract_bits > @@ -324,10 +320,24 @@ void cpnd_restart_update_timer(CPND_CB * > > void *cpnd_restart_shm_create(NCS_OS_POSIX_SHM_REQ_INFO *cpnd_open_req, > CPND_CB *cb, SaClmNodeIdT nodeid) > { > - uint32_t rc = NCSCC_RC_SUCCESS; > +
[devel] [PATCH 1 of 1] cpnd: use shared memory based on ckpt name length [#2108] V2
osaf/libs/common/cpsv/include/cpsv_shm.h | 28 +- osaf/services/saf/cpsv/cpnd/cpnd_res.c | 868 -- 2 files changed, 355 insertions(+), 541 deletions(-) problem: In the case of CKPT osafckptnd increased 3,5Mb - 240 percent on all nodes CKPT_INFO size inscrease when support longDN lead to total size increase. solution: - From start, cpnd use old format shm. - Run time cpnd keep using old format shm until first longDN checkpoint is created. After that cpnd create extended format shm for longDN use. - Fix init size for shm. diff --git a/osaf/libs/common/cpsv/include/cpsv_shm.h b/osaf/libs/common/cpsv/include/cpsv_shm.h --- a/osaf/libs/common/cpsv/include/cpsv_shm.h +++ b/osaf/libs/common/cpsv/include/cpsv_shm.h @@ -27,7 +27,9 @@ #define SHM_NEXT -3 #define SHM_INIT -1 -#define CPSV_CPND_SHM_VERSION1 +#define CPSV_CPND_SHM_VERSION 1 +#define CPSV_CPND_SHM_VERSION_DEPRECATE2 +#define CPSV_CPND_SHM_VERSION_EXTENDED 3 typedef struct cpsv_ckpt_hdr { SaCkptCheckpointHandleT ckpt_id;/* Index for identifying the checkpoint */ @@ -57,7 +59,7 @@ typedef struct cpsv_sect_hdr { } CPSV_SECT_HDR; typedef struct ckpt_info { - char ckpt_name[kOsafMaxDnLength]; + SaNameT ckpt_name; SaCkptCheckpointHandleT ckpt_id; uint32_t maxSections; SaSizeT maxSecSize; @@ -74,23 +76,10 @@ typedef struct ckpt_info { int32_t next; } CKPT_INFO; -typedef struct ckpt_info_v0 { - SaNameT ckpt_name; - SaCkptCheckpointHandleT ckpt_id; - uint32_t maxSections; - SaSizeT maxSecSize; - NODE_ID node_id; - int32_t offset; - uint32_t client_bitmap; - int32_t is_valid; - uint32_t bm_offset; - bool is_unlink; - bool is_close; - bool cpnd_rep_create; - bool is_first; - SaTimeT close_time; - int32_t next; -} CKPT_INFO_V0; +typedef struct ckpt_extend_info { + char ckpt_name[kOsafMaxDnLength + 1]; + uint32_t is_valid; +} CKPT_EXTENDED_INFO; typedef struct client_info { SaCkptHandleT ckpt_app_hdl; @@ -109,6 +98,7 @@ typedef struct gbl_shm_ptr { void *base_addr; void *cli_addr; void *ckpt_addr; + void *extended_addr;/* Added in CPSV_CPND_SHM_VERSION_EXTENDED */ int32_t n_clients; int32_t n_ckpts; } GBL_SHM_PTR; diff --git a/osaf/services/saf/cpsv/cpnd/cpnd_res.c b/osaf/services/saf/cpsv/cpnd/cpnd_res.c --- a/osaf/services/saf/cpsv/cpnd/cpnd_res.c +++ b/osaf/services/saf/cpsv/cpnd/cpnd_res.c @@ -40,8 +40,6 @@ #define m_CPND_CKPTINFO_READ(ckpt_info,addr,offset) memcpy(&ckpt_info,addr+offset,sizeof(CKPT_INFO)) -#define m_CPND_CKPTINFO_V0_READ(ckpt_info,addr,offset) memcpy(&ckpt_info,addr+offset,sizeof(CKPT_INFO_V0)) - #define m_CPND_CKPTINFO_UPDATE(addr,ckpt_info,offset) memcpy(addr+offset,&ckpt_info,sizeof(CKPT_INFO)) #define m_CPND_CKPTHDR_UPDATE(ckpt_hdr,offset) memcpy(offset,&ckpt_hdr,sizeof(CKPT_HDR)) @@ -50,13 +48,11 @@ static uint32_t cpnd_res_ckpt_sec_add(CP static bool cpnd_find_exact_ckptinfo(CPND_CB *cb, CKPT_INFO *ckpt_info, uint32_t bitmap_offset, uint32_t *offset, uint32_t *prev_offset); static void cpnd_clear_ckpt_info(CPND_CB *cb, CPND_CKPT_NODE *cp_node, uint32_t curr_offset, uint32_t prev_offset); -static uint32_t cpnd_restore_client_info(CPND_CB *cb, uint8_t *cli_addr); -static uint32_t cpnd_restore_ckpt_info_v1(CPND_CB *cb, uint8_t *ckpt_addr, SaClmNodeIdT nodeid); -static uint32_t cpnd_restore_ckpt_info_v0(CPND_CB *cb, uint8_t *ckpt_addr, SaClmNodeIdT nodeid); -static void cpnd_destroy_shm_cpnd_cp_info(NCS_OS_POSIX_SHM_REQ_OPEN_INFO *open_req); -static void *cpnd_create_shm_cpnd_cp_info(NCS_OS_POSIX_SHM_REQ_INFO *req_info); -static void cpnd_update_shm_cpnd_cp_info(CPND_CB *cb); -static void cpnd_convert_cp_info_v0(CKPT_INFO_V0 *cp_info_v0, CKPT_INFO *cp_info); +static void cpnd_destroy_shm(NCS_OS_POSIX_SHM_REQ_OPEN_INFO *open_req); +static uint32_t cpnd_shm_extended_open(CPND_CB *cb, uint32_t flag); +static uint32_t cpnd_extended_name_lend(SaConstStringT value, SaNameT* name); +static SaConstStringT cpnd_extended_name_borrow(const SaNameT* name); +static void cpnd_extended_name_free(const SaNameT* name); /*** * * Name : cpnd_client_extract_bits @@ -324,10 +320,24 @@ void cpnd_restart_update_timer(CPND_CB * void *cpnd_restart_shm_create(NCS_OS_POSIX_SHM_REQ_INFO *cpnd_open_req, CPND_CB *cb, SaClmNodeIdT nodeid) { - uint32_t rc = NCSCC_RC_SUCCESS; + uint32_t counter = 0, count, num_bitset = 0, n_clients, rc = NCSCC_RC_SUCCESS, bit_position; + uint64_t i_offset; + int32_t next_offset; + CPND_CKPT_CLIENT_NODE *cl_node = NULL; + CPND_CKPT_NODE *cp_node = NULL; + CLIENT_INFO cl_info; + CLIENT_HDR cli_hdr; + CKPT_INFO cp_info, tmp_cp_
Re: [devel] [PATCH 1 of 1] cpnd: use shared memory based on ckpt name length [#2108] V2
Hi Hoan, What I tested is a simple default cpsv `small format shm` ( Short-DN) functionality, this `big format shm` (LONG-DN) fix has side-effect/implication on existing functionality and these issue will become more complex to handle bugs, so please test all case as follows with new patch and then publish: 1) `small format shm` ( Short-DN), 2) `big format shm` ( LONG-DN) 3) combination of both `small format shm` ( Short-DN) & `big format shm` ( LONG-DN) -AVM On 10/26/2016 10:51 AM, Vo Minh Hoang wrote: > Dear Mahesh, > > Thank you very much for your help. > Compared to your test app I found my test stop too soon. > After reboot I just check shm existence, did not check to open again. > > I will send fix patch soon after carefully test it again. > > Thank you and best regards, > Hoang > > -Original Message- > From: A V Mahesh [mailto:mahesh.va...@oracle.com] > Sent: Wednesday, October 26, 2016 11:02 AM > To: Vo Minh Hoang ; anders.wid...@ericsson.com > Cc: opensaf-devel@lists.sourceforge.net > Subject: Re: [PATCH 1 of 1] cpnd: use shared memory based on ckpt name > length [#2108] V2 > > Hi Hoang, > > The attached `test_#2108_app.c` application will generate cpnd shm open > request is getting failed case > > #gcc test_#2108_app.c -o checkpoint -lSaCkpt > > -AVM > > On 10/25/2016 12:23 PM, A V Mahesh wrote: >> Hi Hoang, >> >> On 10/25/2016 12:10 PM, Vo Minh Hoang wrote: >>> Would you please tell me the process to reproduce this error? >> I will write standalone application and will share with you . >> >> -AVM >> >> On 10/25/2016 12:10 PM, Vo Minh Hoang wrote: >>> Dear Mahesh, >>> >>> Thank you very much for your checking. >>> It is very strangle that I tested with 2 following case: >>> - restart nd by kill -9 >>> - restart node by kill -9 >>> Both cases executed well in my local machine. >>> >>> Would you please tell me the process to reproduce this error? >>> It is very strangle that ER is cannot open replica's shm that is not in >>> touch of this patch. >>> >>> Thank you and best regards, >>> Hoang >>> >>> -Original Message- >>> From: A V Mahesh [mailto:mahesh.va...@oracle.com] >>> Sent: Tuesday, October 25, 2016 12:53 PM >>> To: Hoang Vo ; anders.wid...@ericsson.com >>> Cc: opensaf-devel@lists.sourceforge.net >>> Subject: Re: [PATCH 1 of 1] cpnd: use shared memory based on ckpt name >>> length [#2108] V2 >>> >>> Hi Hoang, >>> >>> With the patch after CPND restart cpnd shm open request is getting >>> failed >>> >>> please test CPND restart cases. >>> >>> > > >>> >>> >>> saCkptCheckpointOpen returned checkpointHandle 626040 >>> 222 saCkptCheckpointOpen returned checkpointHandle 6261f0 >>> Before pkill osafckptnd saCkptCheckpointOpen >>> root 23946 1 0 11:14 ?00:00:00 >>> /usr/lib64/opensaf/osafckptnd >>> root 24041 24038 0 11:15 pts/000:00:00 sh -c ps -ef | grep >>> osafckptnd >>> root 24043 24041 0 11:15 pts/000:00:00 grep osafckptnd >>> Oct 25 11:15:07 SC-1 osafckptnd[23946]: exiting for shutdown >>> Oct 25 11:15:07 SC-1 osafamfnd[23844]: NO >>> 'safSu=SC-1,safSg=NoRed,safApp=OpenSAF' component restart probation >>> timer started (timeout: 600 ns) >>> Oct 25 11:15:07 SC-1 osafamfnd[23844]: NO Restarting a component of >>> 'safSu=SC-1,safSg=NoRed,safApp=OpenSAF' (comp restart count: 1) >>> Oct 25 11:15:07 SC-1 osafamfnd[23844]: NO >>> 'safComp=CPND,safSu=SC-1,safSg=NoRed,safApp=OpenSAF' faulted due to >>> 'avaDown' : Recovery is 'componentRestart' >>> Oct 25 11:15:07 SC-1 osafckptd[23989]: NO cpnd_down_process:: Start >>> CPND_RETENTION timer id = 0x663f10, arg=0x664020 >>> Oct 25 11:15:07 SC-1 osafckptnd[24058]: Started >>> VV saCkptCheckpointOpen 3rd may hit try again returned 18. >>> 333 saCkptCheckpointOpen returned checkpointHandle 7f29fbdc7588 >>> VV saCkptCheckpointOpen 4th returned may hit try again >>> returned 12. >>> 444 saCkptCheckpointOpen returned checkpointHandle 7fffb4a097d8 >>> saCkptCheckpointOpen 5th returned 12. >>> saCkptCheckpointOpen returned checkpointHandle 7f29fbdf61a8 >>> Before pkill osafckptnd & saCkptCheckpointClose >>> root 24058 1 0 11:15 ?00:00:00 >>> /usr/lib64/opensaf/osafckptnd >>> root 24063 24038 0 11:15 pts/000:00:00 sh -c ps -ef | grep >>> osafckptnd >>> root 24065 24063 0 11:15 pts/000:00:00 grep osafckptnd >>> Oct 25 11:15:19 SC-1 osafckptnd[24058]: exiting for shutdown >>> Oct 25 11:15:19 SC-1 osafamfnd[23844]: NO Restarting a component of >>> 'safSu=SC-1,safSg=NoRed,safApp=OpenSAF' (comp restart count: 2) >>> Oct 25 11:15:19 SC-1 osafamfnd[23844]: NO >>> 'safComp=CPND,safSu=SC-1,safSg=NoRed,safApp=OpenSAF' faulted due to >>> 'avaDown' : Recovery is 'componentRestart' >>> Oct 25 11:15:19 SC-1 osafckptnd[24080]: Started >>> Oct 25 11:15:19 SC-1 osafckptnd[24080]: ER cpnd shm open request f
Re: [devel] [PATCH 1 of 1] cpnd: use shared memory based on ckpt name length [#2108] V2
Dear Mahesh, Thank you very much for your help. Compared to your test app I found my test stop too soon. After reboot I just check shm existence, did not check to open again. I will send fix patch soon after carefully test it again. Thank you and best regards, Hoang -Original Message- From: A V Mahesh [mailto:mahesh.va...@oracle.com] Sent: Wednesday, October 26, 2016 11:02 AM To: Vo Minh Hoang ; anders.wid...@ericsson.com Cc: opensaf-devel@lists.sourceforge.net Subject: Re: [PATCH 1 of 1] cpnd: use shared memory based on ckpt name length [#2108] V2 Hi Hoang, The attached `test_#2108_app.c` application will generate cpnd shm open request is getting failed case #gcc test_#2108_app.c -o checkpoint -lSaCkpt -AVM On 10/25/2016 12:23 PM, A V Mahesh wrote: > Hi Hoang, > > On 10/25/2016 12:10 PM, Vo Minh Hoang wrote: >> Would you please tell me the process to reproduce this error? > I will write standalone application and will share with you . > > -AVM > > On 10/25/2016 12:10 PM, Vo Minh Hoang wrote: >> Dear Mahesh, >> >> Thank you very much for your checking. >> It is very strangle that I tested with 2 following case: >> - restart nd by kill -9 >> - restart node by kill -9 >> Both cases executed well in my local machine. >> >> Would you please tell me the process to reproduce this error? >> It is very strangle that ER is cannot open replica's shm that is not in >> touch of this patch. >> >> Thank you and best regards, >> Hoang >> >> -Original Message- >> From: A V Mahesh [mailto:mahesh.va...@oracle.com] >> Sent: Tuesday, October 25, 2016 12:53 PM >> To: Hoang Vo ; anders.wid...@ericsson.com >> Cc: opensaf-devel@lists.sourceforge.net >> Subject: Re: [PATCH 1 of 1] cpnd: use shared memory based on ckpt name >> length [#2108] V2 >> >> Hi Hoang, >> >> With the patch after CPND restart cpnd shm open request is getting >> failed >> >> please test CPND restart cases. >> >> >> >> >> >> saCkptCheckpointOpen returned checkpointHandle 626040 >> 222 saCkptCheckpointOpen returned checkpointHandle 6261f0 >>Before pkill osafckptnd saCkptCheckpointOpen >> root 23946 1 0 11:14 ?00:00:00 >> /usr/lib64/opensaf/osafckptnd >> root 24041 24038 0 11:15 pts/000:00:00 sh -c ps -ef | grep >> osafckptnd >> root 24043 24041 0 11:15 pts/000:00:00 grep osafckptnd >> Oct 25 11:15:07 SC-1 osafckptnd[23946]: exiting for shutdown >> Oct 25 11:15:07 SC-1 osafamfnd[23844]: NO >> 'safSu=SC-1,safSg=NoRed,safApp=OpenSAF' component restart probation >> timer started (timeout: 600 ns) >> Oct 25 11:15:07 SC-1 osafamfnd[23844]: NO Restarting a component of >> 'safSu=SC-1,safSg=NoRed,safApp=OpenSAF' (comp restart count: 1) >> Oct 25 11:15:07 SC-1 osafamfnd[23844]: NO >> 'safComp=CPND,safSu=SC-1,safSg=NoRed,safApp=OpenSAF' faulted due to >> 'avaDown' : Recovery is 'componentRestart' >> Oct 25 11:15:07 SC-1 osafckptd[23989]: NO cpnd_down_process:: Start >> CPND_RETENTION timer id = 0x663f10, arg=0x664020 >> Oct 25 11:15:07 SC-1 osafckptnd[24058]: Started >>VV saCkptCheckpointOpen 3rd may hit try again returned 18. >> 333 saCkptCheckpointOpen returned checkpointHandle 7f29fbdc7588 >>VV saCkptCheckpointOpen 4th returned may hit try again >> returned 12. >> 444 saCkptCheckpointOpen returned checkpointHandle 7fffb4a097d8 >>saCkptCheckpointOpen 5th returned 12. >> saCkptCheckpointOpen returned checkpointHandle 7f29fbdf61a8 >>Before pkill osafckptnd & saCkptCheckpointClose >> root 24058 1 0 11:15 ?00:00:00 >> /usr/lib64/opensaf/osafckptnd >> root 24063 24038 0 11:15 pts/000:00:00 sh -c ps -ef | grep >> osafckptnd >> root 24065 24063 0 11:15 pts/000:00:00 grep osafckptnd >> Oct 25 11:15:19 SC-1 osafckptnd[24058]: exiting for shutdown >> Oct 25 11:15:19 SC-1 osafamfnd[23844]: NO Restarting a component of >> 'safSu=SC-1,safSg=NoRed,safApp=OpenSAF' (comp restart count: 2) >> Oct 25 11:15:19 SC-1 osafamfnd[23844]: NO >> 'safComp=CPND,safSu=SC-1,safSg=NoRed,safApp=OpenSAF' faulted due to >> 'avaDown' : Recovery is 'componentRestart' >> Oct 25 11:15:19 SC-1 osafckptnd[24080]: Started >> Oct 25 11:15:19 SC-1 osafckptnd[24080]: ER cpnd shm open request failed >> safCkpt=checkpoint_tes_131343_1 >> Oct 25 11:15:19 SC-1 osafckptnd[24080]: ER cpnd shm open request failed >> safCkpt=checkpoint_tes_131343_1 >> saCkptCheckpointClose checkpointHandle 626040 >> Attempt 0-0: saCkptCheckpointClose returned 12. >> 222 saCkptCheckpointClose checkpointHandle 6261f0 >> Attempt 0-0: saCkptCheckpointClose returned 12. >> 333 saCkptCheckpointClose checkpointHandle 7f29fbdc7588 >> Attempt 0-0: saCkptCheckpointClose returned 9. >> saCkptCheckpointClose checkpointHandle 7fffb4a097d8 >> Attempt 0-0: saCkptCheckpointClose returned 9. >> 555 saCkptCheckpointClose checkpointHandle 7f29fbdf61a8 >> Attempt 0-0
Re: [devel] [PATCH 1 of 1] cpnd: use shared memory based on ckpt name length [#2108] V2
Hi Hoang, The attached `test_#2108_app.c` application will generate cpnd shm open request is getting failed case #gcc test_#2108_app.c -o checkpoint -lSaCkpt -AVM On 10/25/2016 12:23 PM, A V Mahesh wrote: Hi Hoang, On 10/25/2016 12:10 PM, Vo Minh Hoang wrote: Would you please tell me the process to reproduce this error? I will write standalone application and will share with you . -AVM On 10/25/2016 12:10 PM, Vo Minh Hoang wrote: Dear Mahesh, Thank you very much for your checking. It is very strangle that I tested with 2 following case: - restart nd by kill -9 - restart node by kill -9 Both cases executed well in my local machine. Would you please tell me the process to reproduce this error? It is very strangle that ER is cannot open replica's shm that is not in touch of this patch. Thank you and best regards, Hoang -Original Message- From: A V Mahesh [mailto:mahesh.va...@oracle.com] Sent: Tuesday, October 25, 2016 12:53 PM To: Hoang Vo ; anders.wid...@ericsson.com Cc: opensaf-devel@lists.sourceforge.net Subject: Re: [PATCH 1 of 1] cpnd: use shared memory based on ckpt name length [#2108] V2 Hi Hoang, With the patch after CPND restart cpnd shm open request is getting failed please test CPND restart cases. saCkptCheckpointOpen returned checkpointHandle 626040 222 saCkptCheckpointOpen returned checkpointHandle 6261f0 Before pkill osafckptnd saCkptCheckpointOpen root 23946 1 0 11:14 ?00:00:00 /usr/lib64/opensaf/osafckptnd root 24041 24038 0 11:15 pts/000:00:00 sh -c ps -ef | grep osafckptnd root 24043 24041 0 11:15 pts/000:00:00 grep osafckptnd Oct 25 11:15:07 SC-1 osafckptnd[23946]: exiting for shutdown Oct 25 11:15:07 SC-1 osafamfnd[23844]: NO 'safSu=SC-1,safSg=NoRed,safApp=OpenSAF' component restart probation timer started (timeout: 600 ns) Oct 25 11:15:07 SC-1 osafamfnd[23844]: NO Restarting a component of 'safSu=SC-1,safSg=NoRed,safApp=OpenSAF' (comp restart count: 1) Oct 25 11:15:07 SC-1 osafamfnd[23844]: NO 'safComp=CPND,safSu=SC-1,safSg=NoRed,safApp=OpenSAF' faulted due to 'avaDown' : Recovery is 'componentRestart' Oct 25 11:15:07 SC-1 osafckptd[23989]: NO cpnd_down_process:: Start CPND_RETENTION timer id = 0x663f10, arg=0x664020 Oct 25 11:15:07 SC-1 osafckptnd[24058]: Started VV saCkptCheckpointOpen 3rd may hit try again returned 18. 333 saCkptCheckpointOpen returned checkpointHandle 7f29fbdc7588 VV saCkptCheckpointOpen 4th returned may hit try again returned 12. 444 saCkptCheckpointOpen returned checkpointHandle 7fffb4a097d8 saCkptCheckpointOpen 5th returned 12. saCkptCheckpointOpen returned checkpointHandle 7f29fbdf61a8 Before pkill osafckptnd & saCkptCheckpointClose root 24058 1 0 11:15 ?00:00:00 /usr/lib64/opensaf/osafckptnd root 24063 24038 0 11:15 pts/000:00:00 sh -c ps -ef | grep osafckptnd root 24065 24063 0 11:15 pts/000:00:00 grep osafckptnd Oct 25 11:15:19 SC-1 osafckptnd[24058]: exiting for shutdown Oct 25 11:15:19 SC-1 osafamfnd[23844]: NO Restarting a component of 'safSu=SC-1,safSg=NoRed,safApp=OpenSAF' (comp restart count: 2) Oct 25 11:15:19 SC-1 osafamfnd[23844]: NO 'safComp=CPND,safSu=SC-1,safSg=NoRed,safApp=OpenSAF' faulted due to 'avaDown' : Recovery is 'componentRestart' Oct 25 11:15:19 SC-1 osafckptnd[24080]: Started Oct 25 11:15:19 SC-1 osafckptnd[24080]: ER cpnd shm open request failed safCkpt=checkpoint_tes_131343_1 Oct 25 11:15:19 SC-1 osafckptnd[24080]: ER cpnd shm open request failed safCkpt=checkpoint_tes_131343_1 saCkptCheckpointClose checkpointHandle 626040 Attempt 0-0: saCkptCheckpointClose returned 12. 222 saCkptCheckpointClose checkpointHandle 6261f0 Attempt 0-0: saCkptCheckpointClose returned 12. 333 saCkptCheckpointClose checkpointHandle 7f29fbdc7588 Attempt 0-0: saCkptCheckpointClose returned 9. saCkptCheckpointClose checkpointHandle 7fffb4a097d8 Attempt 0-0: saCkptCheckpointClose returned 9. 555 saCkptCheckpointClose checkpointHandle 7f29fbdf61a8 Attempt 0-0: saCkptCheckpointClose returned 9. saCkptCheckpointOpen returned checkpointHandle 626040 222 saCkptCheckpointOpen returned checkpointHandle 628b40 Before pkill osafckptnd saCkptCheckpointOpen root 24080 1 0 11:15 ?00:00:00 /usr/lib64/opensaf/osafckptnd root 24085 24038 0 11:15 pts/000:00:00 sh -c ps -ef | grep osafckptnd root 24087 24085 0 11:15 pts/000:00:00 grep osafckptnd Oct 25 11:15:26 SC-1 osafckptnd[24080]: exiting for shutdown Oct 25 11:15:26 SC-1 osafamfnd[23844]: NO Restarting a component of 'safSu=SC-1,safSg=NoRed,safApp=OpenSAF' (comp restart count: 3) Oct 25 11:15:26 SC-1 osafamfnd[23844]: NO 'safComp=CPND,safSu=SC-1,safSg=NoRed,safApp=OpenSAF' faulted due to 'avaDown' : Recovery is 'componentRestart' Oct 25 11:15:26 SC-1 osafckptd[23989]: NO cpnd_down_pro
Re: [devel] [PATCH 1 of 1] cpnd: use shared memory based on ckpt name length [#2108] V2
Hi Hoang, On 10/25/2016 12:10 PM, Vo Minh Hoang wrote: > Would you please tell me the process to reproduce this error? I will write standalone application and will share with you . -AVM On 10/25/2016 12:10 PM, Vo Minh Hoang wrote: > Dear Mahesh, > > Thank you very much for your checking. > It is very strangle that I tested with 2 following case: > - restart nd by kill -9 > - restart node by kill -9 > Both cases executed well in my local machine. > > Would you please tell me the process to reproduce this error? > It is very strangle that ER is cannot open replica's shm that is not in > touch of this patch. > > Thank you and best regards, > Hoang > > -Original Message- > From: A V Mahesh [mailto:mahesh.va...@oracle.com] > Sent: Tuesday, October 25, 2016 12:53 PM > To: Hoang Vo ; anders.wid...@ericsson.com > Cc: opensaf-devel@lists.sourceforge.net > Subject: Re: [PATCH 1 of 1] cpnd: use shared memory based on ckpt name > length [#2108] V2 > > Hi Hoang, > > With the patch after CPND restart cpnd shm open request is getting failed > > please test CPND restart cases. > > > > > saCkptCheckpointOpen returned checkpointHandle 626040 > 222 saCkptCheckpointOpen returned checkpointHandle 6261f0 >Before pkill osafckptnd saCkptCheckpointOpen > root 23946 1 0 11:14 ?00:00:00 > /usr/lib64/opensaf/osafckptnd > root 24041 24038 0 11:15 pts/000:00:00 sh -c ps -ef | grep > osafckptnd > root 24043 24041 0 11:15 pts/000:00:00 grep osafckptnd > Oct 25 11:15:07 SC-1 osafckptnd[23946]: exiting for shutdown > Oct 25 11:15:07 SC-1 osafamfnd[23844]: NO > 'safSu=SC-1,safSg=NoRed,safApp=OpenSAF' component restart probation > timer started (timeout: 600 ns) > Oct 25 11:15:07 SC-1 osafamfnd[23844]: NO Restarting a component of > 'safSu=SC-1,safSg=NoRed,safApp=OpenSAF' (comp restart count: 1) > Oct 25 11:15:07 SC-1 osafamfnd[23844]: NO > 'safComp=CPND,safSu=SC-1,safSg=NoRed,safApp=OpenSAF' faulted due to > 'avaDown' : Recovery is 'componentRestart' > Oct 25 11:15:07 SC-1 osafckptd[23989]: NO cpnd_down_process:: Start > CPND_RETENTION timer id = 0x663f10, arg=0x664020 > Oct 25 11:15:07 SC-1 osafckptnd[24058]: Started >VV saCkptCheckpointOpen 3rd may hit try again returned 18. > 333 saCkptCheckpointOpen returned checkpointHandle 7f29fbdc7588 >VV saCkptCheckpointOpen 4th returned may hit try again > returned 12. > 444 saCkptCheckpointOpen returned checkpointHandle 7fffb4a097d8 >saCkptCheckpointOpen 5th returned 12. > saCkptCheckpointOpen returned checkpointHandle 7f29fbdf61a8 >Before pkill osafckptnd & saCkptCheckpointClose > root 24058 1 0 11:15 ?00:00:00 > /usr/lib64/opensaf/osafckptnd > root 24063 24038 0 11:15 pts/000:00:00 sh -c ps -ef | grep > osafckptnd > root 24065 24063 0 11:15 pts/000:00:00 grep osafckptnd > Oct 25 11:15:19 SC-1 osafckptnd[24058]: exiting for shutdown > Oct 25 11:15:19 SC-1 osafamfnd[23844]: NO Restarting a component of > 'safSu=SC-1,safSg=NoRed,safApp=OpenSAF' (comp restart count: 2) > Oct 25 11:15:19 SC-1 osafamfnd[23844]: NO > 'safComp=CPND,safSu=SC-1,safSg=NoRed,safApp=OpenSAF' faulted due to > 'avaDown' : Recovery is 'componentRestart' > Oct 25 11:15:19 SC-1 osafckptnd[24080]: Started > Oct 25 11:15:19 SC-1 osafckptnd[24080]: ER cpnd shm open request failed > safCkpt=checkpoint_tes_131343_1 > Oct 25 11:15:19 SC-1 osafckptnd[24080]: ER cpnd shm open request failed > safCkpt=checkpoint_tes_131343_1 > saCkptCheckpointClose checkpointHandle 626040 > Attempt 0-0: saCkptCheckpointClose returned 12. > 222 saCkptCheckpointClose checkpointHandle 6261f0 > Attempt 0-0: saCkptCheckpointClose returned 12. > 333 saCkptCheckpointClose checkpointHandle 7f29fbdc7588 > Attempt 0-0: saCkptCheckpointClose returned 9. > saCkptCheckpointClose checkpointHandle 7fffb4a097d8 > Attempt 0-0: saCkptCheckpointClose returned 9. > 555 saCkptCheckpointClose checkpointHandle 7f29fbdf61a8 > Attempt 0-0: saCkptCheckpointClose returned 9. > saCkptCheckpointOpen returned checkpointHandle 626040 > 222 saCkptCheckpointOpen returned checkpointHandle 628b40 >Before pkill osafckptnd saCkptCheckpointOpen > root 24080 1 0 11:15 ?00:00:00 > /usr/lib64/opensaf/osafckptnd > root 24085 24038 0 11:15 pts/000:00:00 sh -c ps -ef | grep > osafckptnd > root 24087 24085 0 11:15 pts/000:00:00 grep osafckptnd > Oct 25 11:15:26 SC-1 osafckptnd[24080]: exiting for shutdown > Oct 25 11:15:26 SC-1 osafamfnd[23844]: NO Restarting a component of > 'safSu=SC-1,safSg=NoRed,safApp=OpenSAF' (comp restart count: 3) > Oct 25 11:15:26 SC-1 osafamfnd[23844]: NO > 'safComp=CPND,safSu=SC-1,safSg=NoRed,safApp=OpenSAF' faulted due to > 'avaDown' : Recovery is 'componentRestart' > Oct 25 11:15:26 SC-1 osafckptd[23989]: NO cpnd_down_process:: Start > CPND_RETENTI
Re: [devel] [PATCH 1 of 1] cpnd: use shared memory based on ckpt name length [#2108] V2
Dear Mahesh, Thank you very much for your checking. It is very strangle that I tested with 2 following case: - restart nd by kill -9 - restart node by kill -9 Both cases executed well in my local machine. Would you please tell me the process to reproduce this error? It is very strangle that ER is cannot open replica's shm that is not in touch of this patch. Thank you and best regards, Hoang -Original Message- From: A V Mahesh [mailto:mahesh.va...@oracle.com] Sent: Tuesday, October 25, 2016 12:53 PM To: Hoang Vo ; anders.wid...@ericsson.com Cc: opensaf-devel@lists.sourceforge.net Subject: Re: [PATCH 1 of 1] cpnd: use shared memory based on ckpt name length [#2108] V2 Hi Hoang, With the patch after CPND restart cpnd shm open request is getting failed please test CPND restart cases. saCkptCheckpointOpen returned checkpointHandle 626040 222 saCkptCheckpointOpen returned checkpointHandle 6261f0 Before pkill osafckptnd saCkptCheckpointOpen root 23946 1 0 11:14 ?00:00:00 /usr/lib64/opensaf/osafckptnd root 24041 24038 0 11:15 pts/000:00:00 sh -c ps -ef | grep osafckptnd root 24043 24041 0 11:15 pts/000:00:00 grep osafckptnd Oct 25 11:15:07 SC-1 osafckptnd[23946]: exiting for shutdown Oct 25 11:15:07 SC-1 osafamfnd[23844]: NO 'safSu=SC-1,safSg=NoRed,safApp=OpenSAF' component restart probation timer started (timeout: 600 ns) Oct 25 11:15:07 SC-1 osafamfnd[23844]: NO Restarting a component of 'safSu=SC-1,safSg=NoRed,safApp=OpenSAF' (comp restart count: 1) Oct 25 11:15:07 SC-1 osafamfnd[23844]: NO 'safComp=CPND,safSu=SC-1,safSg=NoRed,safApp=OpenSAF' faulted due to 'avaDown' : Recovery is 'componentRestart' Oct 25 11:15:07 SC-1 osafckptd[23989]: NO cpnd_down_process:: Start CPND_RETENTION timer id = 0x663f10, arg=0x664020 Oct 25 11:15:07 SC-1 osafckptnd[24058]: Started VV saCkptCheckpointOpen 3rd may hit try again returned 18. 333 saCkptCheckpointOpen returned checkpointHandle 7f29fbdc7588 VV saCkptCheckpointOpen 4th returned may hit try again returned 12. 444 saCkptCheckpointOpen returned checkpointHandle 7fffb4a097d8 saCkptCheckpointOpen 5th returned 12. saCkptCheckpointOpen returned checkpointHandle 7f29fbdf61a8 Before pkill osafckptnd & saCkptCheckpointClose root 24058 1 0 11:15 ?00:00:00 /usr/lib64/opensaf/osafckptnd root 24063 24038 0 11:15 pts/000:00:00 sh -c ps -ef | grep osafckptnd root 24065 24063 0 11:15 pts/000:00:00 grep osafckptnd Oct 25 11:15:19 SC-1 osafckptnd[24058]: exiting for shutdown Oct 25 11:15:19 SC-1 osafamfnd[23844]: NO Restarting a component of 'safSu=SC-1,safSg=NoRed,safApp=OpenSAF' (comp restart count: 2) Oct 25 11:15:19 SC-1 osafamfnd[23844]: NO 'safComp=CPND,safSu=SC-1,safSg=NoRed,safApp=OpenSAF' faulted due to 'avaDown' : Recovery is 'componentRestart' Oct 25 11:15:19 SC-1 osafckptnd[24080]: Started Oct 25 11:15:19 SC-1 osafckptnd[24080]: ER cpnd shm open request failed safCkpt=checkpoint_tes_131343_1 Oct 25 11:15:19 SC-1 osafckptnd[24080]: ER cpnd shm open request failed safCkpt=checkpoint_tes_131343_1 saCkptCheckpointClose checkpointHandle 626040 Attempt 0-0: saCkptCheckpointClose returned 12. 222 saCkptCheckpointClose checkpointHandle 6261f0 Attempt 0-0: saCkptCheckpointClose returned 12. 333 saCkptCheckpointClose checkpointHandle 7f29fbdc7588 Attempt 0-0: saCkptCheckpointClose returned 9. saCkptCheckpointClose checkpointHandle 7fffb4a097d8 Attempt 0-0: saCkptCheckpointClose returned 9. 555 saCkptCheckpointClose checkpointHandle 7f29fbdf61a8 Attempt 0-0: saCkptCheckpointClose returned 9. saCkptCheckpointOpen returned checkpointHandle 626040 222 saCkptCheckpointOpen returned checkpointHandle 628b40 Before pkill osafckptnd saCkptCheckpointOpen root 24080 1 0 11:15 ?00:00:00 /usr/lib64/opensaf/osafckptnd root 24085 24038 0 11:15 pts/000:00:00 sh -c ps -ef | grep osafckptnd root 24087 24085 0 11:15 pts/000:00:00 grep osafckptnd Oct 25 11:15:26 SC-1 osafckptnd[24080]: exiting for shutdown Oct 25 11:15:26 SC-1 osafamfnd[23844]: NO Restarting a component of 'safSu=SC-1,safSg=NoRed,safApp=OpenSAF' (comp restart count: 3) Oct 25 11:15:26 SC-1 osafamfnd[23844]: NO 'safComp=CPND,safSu=SC-1,safSg=NoRed,safApp=OpenSAF' faulted due to 'avaDown' : Recovery is 'componentRestart' Oct 25 11:15:26 SC-1 osafckptd[23989]: NO cpnd_down_process:: Start CPND_RETENTION timer id = 0x663f10, arg=0x664020 Oct 25 11:15:26 SC-1 osafckptnd[24102]: Started Oct 25 11:15:26 SC-1 osafckptnd[24102]: ER cpnd shm open request failed safCkpt=checkpoint_tes_131343_1 Oct 25 11:15:26 SC-1 osafckptnd[24102]: ER cpnd shm open request failed safCkpt=checkpoint_tes_131343_1 VV saCkptCheckpointOpen 3rd may hit try again returned 18. 333 saCkptCheckpointOpen returned checkpointHan
Re: [devel] [PATCH 1 of 1] cpnd: use shared memory based on ckpt name length [#2108] V2
Hi Hoang, With the patch after CPND restart cpnd shm open request is getting failed please test CPND restart cases. saCkptCheckpointOpen returned checkpointHandle 626040 222 saCkptCheckpointOpen returned checkpointHandle 6261f0 Before pkill osafckptnd saCkptCheckpointOpen root 23946 1 0 11:14 ?00:00:00 /usr/lib64/opensaf/osafckptnd root 24041 24038 0 11:15 pts/000:00:00 sh -c ps -ef | grep osafckptnd root 24043 24041 0 11:15 pts/000:00:00 grep osafckptnd Oct 25 11:15:07 SC-1 osafckptnd[23946]: exiting for shutdown Oct 25 11:15:07 SC-1 osafamfnd[23844]: NO 'safSu=SC-1,safSg=NoRed,safApp=OpenSAF' component restart probation timer started (timeout: 600 ns) Oct 25 11:15:07 SC-1 osafamfnd[23844]: NO Restarting a component of 'safSu=SC-1,safSg=NoRed,safApp=OpenSAF' (comp restart count: 1) Oct 25 11:15:07 SC-1 osafamfnd[23844]: NO 'safComp=CPND,safSu=SC-1,safSg=NoRed,safApp=OpenSAF' faulted due to 'avaDown' : Recovery is 'componentRestart' Oct 25 11:15:07 SC-1 osafckptd[23989]: NO cpnd_down_process:: Start CPND_RETENTION timer id = 0x663f10, arg=0x664020 Oct 25 11:15:07 SC-1 osafckptnd[24058]: Started VV saCkptCheckpointOpen 3rd may hit try again returned 18. 333 saCkptCheckpointOpen returned checkpointHandle 7f29fbdc7588 VV saCkptCheckpointOpen 4th returned may hit try again returned 12. 444 saCkptCheckpointOpen returned checkpointHandle 7fffb4a097d8 saCkptCheckpointOpen 5th returned 12. saCkptCheckpointOpen returned checkpointHandle 7f29fbdf61a8 Before pkill osafckptnd & saCkptCheckpointClose root 24058 1 0 11:15 ?00:00:00 /usr/lib64/opensaf/osafckptnd root 24063 24038 0 11:15 pts/000:00:00 sh -c ps -ef | grep osafckptnd root 24065 24063 0 11:15 pts/000:00:00 grep osafckptnd Oct 25 11:15:19 SC-1 osafckptnd[24058]: exiting for shutdown Oct 25 11:15:19 SC-1 osafamfnd[23844]: NO Restarting a component of 'safSu=SC-1,safSg=NoRed,safApp=OpenSAF' (comp restart count: 2) Oct 25 11:15:19 SC-1 osafamfnd[23844]: NO 'safComp=CPND,safSu=SC-1,safSg=NoRed,safApp=OpenSAF' faulted due to 'avaDown' : Recovery is 'componentRestart' Oct 25 11:15:19 SC-1 osafckptnd[24080]: Started Oct 25 11:15:19 SC-1 osafckptnd[24080]: ER cpnd shm open request failed safCkpt=checkpoint_tes_131343_1 Oct 25 11:15:19 SC-1 osafckptnd[24080]: ER cpnd shm open request failed safCkpt=checkpoint_tes_131343_1 saCkptCheckpointClose checkpointHandle 626040 Attempt 0-0: saCkptCheckpointClose returned 12. 222 saCkptCheckpointClose checkpointHandle 6261f0 Attempt 0-0: saCkptCheckpointClose returned 12. 333 saCkptCheckpointClose checkpointHandle 7f29fbdc7588 Attempt 0-0: saCkptCheckpointClose returned 9. saCkptCheckpointClose checkpointHandle 7fffb4a097d8 Attempt 0-0: saCkptCheckpointClose returned 9. 555 saCkptCheckpointClose checkpointHandle 7f29fbdf61a8 Attempt 0-0: saCkptCheckpointClose returned 9. saCkptCheckpointOpen returned checkpointHandle 626040 222 saCkptCheckpointOpen returned checkpointHandle 628b40 Before pkill osafckptnd saCkptCheckpointOpen root 24080 1 0 11:15 ?00:00:00 /usr/lib64/opensaf/osafckptnd root 24085 24038 0 11:15 pts/000:00:00 sh -c ps -ef | grep osafckptnd root 24087 24085 0 11:15 pts/000:00:00 grep osafckptnd Oct 25 11:15:26 SC-1 osafckptnd[24080]: exiting for shutdown Oct 25 11:15:26 SC-1 osafamfnd[23844]: NO Restarting a component of 'safSu=SC-1,safSg=NoRed,safApp=OpenSAF' (comp restart count: 3) Oct 25 11:15:26 SC-1 osafamfnd[23844]: NO 'safComp=CPND,safSu=SC-1,safSg=NoRed,safApp=OpenSAF' faulted due to 'avaDown' : Recovery is 'componentRestart' Oct 25 11:15:26 SC-1 osafckptd[23989]: NO cpnd_down_process:: Start CPND_RETENTION timer id = 0x663f10, arg=0x664020 Oct 25 11:15:26 SC-1 osafckptnd[24102]: Started Oct 25 11:15:26 SC-1 osafckptnd[24102]: ER cpnd shm open request failed safCkpt=checkpoint_tes_131343_1 Oct 25 11:15:26 SC-1 osafckptnd[24102]: ER cpnd shm open request failed safCkpt=checkpoint_tes_131343_1 VV saCkptCheckpointOpen 3rd may hit try again returned 18. 333 saCkptCheckpointOpen returned checkpointHandle 0 VV saCkptCheckpointOpen 4th returned may hit try again returned 12. 444 saCkptCheckpointOpen returned checkpointHandle 0 saCkptCheckpointOpen 5th returned 12. saCkptCheckpointOpen returned checkpointHandle 0 Before pkill osafckptnd & saCkptCheckpointClose root 24102 1 0 11:15 ?00:00:00 /usr/lib64/opensaf/osafckptnd root 24107 24038 0 11:15 pts/000:00:00 sh -c ps -ef | grep osafckptnd root 24109 24107 0 11:15 pts/000:00:00 grep osafckptnd Oct 25 11:15:38 SC-1 osafckptnd[24102]: exiting for shutdown Oct 25 11:15:38 SC-1 osafamfnd[23844]: NO Restarting a component of 'safSu=SC-1,safSg=NoRed,safApp=OpenSAF' (comp restart c
[devel] [PATCH 1 of 1] cpnd: use shared memory based on ckpt name length [#2108] V2
osaf/libs/common/cpsv/include/cpsv_shm.h | 28 +- osaf/services/saf/cpsv/cpnd/cpnd_res.c | 859 +++--- 2 files changed, 336 insertions(+), 551 deletions(-) problem: In the case of CKPT osafckptnd increased 3,5Mb - 240 percent on all nodes CKPT_INFO size inscrease when support longDN lead to total size increase. solution: - From start, cpnd use old format shm. - Run time cpnd keep using old format shm until first longDN checkpoint is created. After that cpnd create extended format shm for longDN use. - Fix init size for shm. diff --git a/osaf/libs/common/cpsv/include/cpsv_shm.h b/osaf/libs/common/cpsv/include/cpsv_shm.h --- a/osaf/libs/common/cpsv/include/cpsv_shm.h +++ b/osaf/libs/common/cpsv/include/cpsv_shm.h @@ -27,7 +27,9 @@ #define SHM_NEXT -3 #define SHM_INIT -1 -#define CPSV_CPND_SHM_VERSION1 +#define CPSV_CPND_SHM_VERSION 1 +#define CPSV_CPND_SHM_VERSION_DEPRECATE2 +#define CPSV_CPND_SHM_VERSION_EXTENDED 3 typedef struct cpsv_ckpt_hdr { SaCkptCheckpointHandleT ckpt_id;/* Index for identifying the checkpoint */ @@ -57,7 +59,7 @@ typedef struct cpsv_sect_hdr { } CPSV_SECT_HDR; typedef struct ckpt_info { - char ckpt_name[kOsafMaxDnLength]; + SaNameT ckpt_name; SaCkptCheckpointHandleT ckpt_id; uint32_t maxSections; SaSizeT maxSecSize; @@ -74,23 +76,10 @@ typedef struct ckpt_info { int32_t next; } CKPT_INFO; -typedef struct ckpt_info_v0 { - SaNameT ckpt_name; - SaCkptCheckpointHandleT ckpt_id; - uint32_t maxSections; - SaSizeT maxSecSize; - NODE_ID node_id; - int32_t offset; - uint32_t client_bitmap; - int32_t is_valid; - uint32_t bm_offset; - bool is_unlink; - bool is_close; - bool cpnd_rep_create; - bool is_first; - SaTimeT close_time; - int32_t next; -} CKPT_INFO_V0; +typedef struct ckpt_extend_info { + char ckpt_name[kOsafMaxDnLength + 1]; + uint32_t is_valid; +} CKPT_EXTENDED_INFO; typedef struct client_info { SaCkptHandleT ckpt_app_hdl; @@ -109,6 +98,7 @@ typedef struct gbl_shm_ptr { void *base_addr; void *cli_addr; void *ckpt_addr; + void *extended_addr;/* Added in CPSV_CPND_SHM_VERSION_EXTENDED */ int32_t n_clients; int32_t n_ckpts; } GBL_SHM_PTR; diff --git a/osaf/services/saf/cpsv/cpnd/cpnd_res.c b/osaf/services/saf/cpsv/cpnd/cpnd_res.c --- a/osaf/services/saf/cpsv/cpnd/cpnd_res.c +++ b/osaf/services/saf/cpsv/cpnd/cpnd_res.c @@ -40,8 +40,6 @@ #define m_CPND_CKPTINFO_READ(ckpt_info,addr,offset) memcpy(&ckpt_info,addr+offset,sizeof(CKPT_INFO)) -#define m_CPND_CKPTINFO_V0_READ(ckpt_info,addr,offset) memcpy(&ckpt_info,addr+offset,sizeof(CKPT_INFO_V0)) - #define m_CPND_CKPTINFO_UPDATE(addr,ckpt_info,offset) memcpy(addr+offset,&ckpt_info,sizeof(CKPT_INFO)) #define m_CPND_CKPTHDR_UPDATE(ckpt_hdr,offset) memcpy(offset,&ckpt_hdr,sizeof(CKPT_HDR)) @@ -50,13 +48,10 @@ static uint32_t cpnd_res_ckpt_sec_add(CP static bool cpnd_find_exact_ckptinfo(CPND_CB *cb, CKPT_INFO *ckpt_info, uint32_t bitmap_offset, uint32_t *offset, uint32_t *prev_offset); static void cpnd_clear_ckpt_info(CPND_CB *cb, CPND_CKPT_NODE *cp_node, uint32_t curr_offset, uint32_t prev_offset); -static uint32_t cpnd_restore_client_info(CPND_CB *cb, uint8_t *cli_addr); -static uint32_t cpnd_restore_ckpt_info_v1(CPND_CB *cb, uint8_t *ckpt_addr, SaClmNodeIdT nodeid); -static uint32_t cpnd_restore_ckpt_info_v0(CPND_CB *cb, uint8_t *ckpt_addr, SaClmNodeIdT nodeid); -static void cpnd_destroy_shm_cpnd_cp_info(NCS_OS_POSIX_SHM_REQ_OPEN_INFO *open_req); -static void *cpnd_create_shm_cpnd_cp_info(NCS_OS_POSIX_SHM_REQ_INFO *req_info); -static void cpnd_update_shm_cpnd_cp_info(CPND_CB *cb); -static void cpnd_convert_cp_info_v0(CKPT_INFO_V0 *cp_info_v0, CKPT_INFO *cp_info); +static void cpnd_destroy_shm(NCS_OS_POSIX_SHM_REQ_OPEN_INFO *open_req); +static uint32_t cpnd_shm_extended_open(CPND_CB *cb, uint32_t flag); +static uint32_t cpnd_extended_name_lend(SaConstStringT value, SaNameT* name); +static void cpnd_extended_name_free(const SaNameT* name); /*** * * Name : cpnd_client_extract_bits @@ -324,10 +319,24 @@ void cpnd_restart_update_timer(CPND_CB * void *cpnd_restart_shm_create(NCS_OS_POSIX_SHM_REQ_INFO *cpnd_open_req, CPND_CB *cb, SaClmNodeIdT nodeid) { - uint32_t rc = NCSCC_RC_SUCCESS; + uint32_t counter = 0, count, num_bitset = 0, n_clients, rc = NCSCC_RC_SUCCESS, bit_position; + uint64_t i_offset; + int32_t next_offset; + CPND_CKPT_CLIENT_NODE *cl_node = NULL; + CPND_CKPT_NODE *cp_node = NULL; + CLIENT_INFO cl_info; + CLIENT_HDR cli_hdr; + CKPT_INFO cp_info, tmp_cp_info; + SaCkptHandleT client_hdl; char *buf = NULL, *buff
Re: [devel] [PATCH 1 of 1] cpnd: use shared memory based on ckpt name length [#2108]
Although it is technically possible to upgrade OpenSAF without rebooting the node, in practice this is neither tested nor recommended anyway. So I don't think you need to support "version 2" in the new code. We do support downgrade of OpenSAF, and I guess it can't be worse than downgrading to a previous version anyhow. You could verify that it doesn't crash if it encounters a shared memory with an unknown version. regards, Anders Widell On 10/18/2016 11:17 AM, Vo Minh Hoang wrote: > Dear Anders and Mahesh, > > Thank you very much for your comments. > > I would like to clarify the backward compatible satisfaction in this case. > We have `small format shm` that is used from beginning (version 1). > We also have `big format shm` that is introduced with Long DN feature > (changeset 7949:815c56c74d18) (version 2). > The question is: > Do we need to make modification that compatible with both 2 kind of shm or > just the original small one and complete remove the big one? > > As you might know, the 2 SHMs approach comes before this SHM swapping ideal > but I postpone implementing it because of the complicated logic for working > with both 2 old versions. > > If we don't need to care about version 2, I might introduce cleaner code > solution. > > Thank you and best regards, > Hoang > > -Original Message- > From: A V Mahesh [mailto:mahesh.va...@oracle.com] > Sent: Tuesday, October 18, 2016 3:51 PM > To: Anders Widell ; Vo Minh Hoang > > Cc: opensaf-devel@lists.sourceforge.net > Subject: Re: [PATCH 1 of 1] cpnd: use shared memory based on ckpt name > length [#2108] > > Hi Hoang, > > Swamping SHM looks more complex logic to me while accessing old & new > shm in transit , > and it will create more issue and will take some time to stabilize. > > Let us explore other options like Anders Widell suggested or any other, > which can be simple and avoids the SHM swap. > > I assessed these issues and I was reluctant to have Long DN for Ckpt > service , > where their is no much piratical use case. > > -AVM > > On 10/18/2016 1:57 PM, Anders Widell wrote: >> Maybe it is better to create a second shm containing a fixed-size >> record of the remaining 2048-256 bytes of the DN? Then you will not >> have to convert the shm format and creating a new shm segment should >> be very quick. >> >> regards, >> >> Anders Widell >> >> >> On 10/18/2016 09:29 AM, Vo Minh Hoang wrote: >>> Dear Mahesh, >>> >>> Sorry I miss-sending incomplete email. >>> This is full version. >>> -- >>> I would like to send my answer to 2 of your concerning points in >>> compound. >>> >>> Based on my understand, a client command affects shared mem by following >>> behavior: >>> >>> Client --> CPA ==> CPND (1) ==> CPD (active) ==> CPND (has replica) (2) >>> > update shm (3) >>> >>> When: >>> --> Synchronous >>> ==> Asynchronous >>> (1) and (2) has same behavior to update shm and store pointer to shm >>> (3) The modification only take place here include swapping shm and >>> update >>> pointers >>> >>> So even there are multiple call from multiple client at a time, CPND >>> update >>> shm in sequence. So just after the first request swaps shm, the second >>> request could access shm. There is not case that 2 requests access >>> shm at >>> the same time. >>> When shm already storing data, in swapping, CPND will update pointer >>> so the >>> next request that accesses old data can still work with updated >>> pointer with >>> same behavior. >>> >>> Thank you and best regards, >>> Hoang >>> >>> -Original Message- >>> From: Vo Minh Hoang [mailto:hoang.m...@dektech.com.au] >>> Sent: Tuesday, October 18, 2016 2:15 PM >>> To: 'A V Mahesh' >>> Cc: 'anders.wid...@ericsson.com' ; >>> 'opensaf-devel@lists.sourceforge.net' >>> >>> Subject: RE: [PATCH 1 of 1] cpnd: use shared memory based on ckpt name >>> length [#2108] >>> >>> Dear Mahesh, >>> >>> I would like to send my answer to 2 of your concerning points in >>> compound. >>> >>> Based on my understand, a client command affects shared mem by following >>> behavior: >>> >>> Client --> CPA ==> CPND (1) ==> CPD (active) ==> CPND (has replica) (2) >>> > update shm (3) >>> >>> When: >>> --> >>> >>> >>> >>> -Original Message- >>> From: A V Mahesh [mailto:mahesh.va...@oracle.com] >>> Sent: Tuesday, October 18, 2016 1:10 PM >>> To: Vo Minh Hoang >>> Cc: anders.wid...@ericsson.com; opensaf-devel@lists.sourceforge.net >>> Subject: Re: [PATCH 1 of 1] cpnd: use shared memory based on ckpt name >>> length [#2108] >>> >>> Hi Hoang, >>> >>> >>> On 10/18/2016 11:24 AM, Vo Minh Hoang wrote: Dear Mahesh, >> [AVM] A non-collated Ckpt will have two replicas on both Active and standby. Each node will receive one CPND_EVT_D2ND_CKPT_CREATE message so it handles swapping itself and does not affect each other nor another. >>> [AVM] I was taking about existing, swapping of existing `small >>> format shm` >>> not not new create request , where the ckpt is already op
Re: [devel] [PATCH 1 of 1] cpnd: use shared memory based on ckpt name length [#2108]
Dear Anders and Mahesh, Thank you very much for your comments. I would like to clarify the backward compatible satisfaction in this case. We have `small format shm` that is used from beginning (version 1). We also have `big format shm` that is introduced with Long DN feature (changeset 7949:815c56c74d18) (version 2). The question is: Do we need to make modification that compatible with both 2 kind of shm or just the original small one and complete remove the big one? As you might know, the 2 SHMs approach comes before this SHM swapping ideal but I postpone implementing it because of the complicated logic for working with both 2 old versions. If we don't need to care about version 2, I might introduce cleaner code solution. Thank you and best regards, Hoang -Original Message- From: A V Mahesh [mailto:mahesh.va...@oracle.com] Sent: Tuesday, October 18, 2016 3:51 PM To: Anders Widell ; Vo Minh Hoang Cc: opensaf-devel@lists.sourceforge.net Subject: Re: [PATCH 1 of 1] cpnd: use shared memory based on ckpt name length [#2108] Hi Hoang, Swamping SHM looks more complex logic to me while accessing old & new shm in transit , and it will create more issue and will take some time to stabilize. Let us explore other options like Anders Widell suggested or any other, which can be simple and avoids the SHM swap. I assessed these issues and I was reluctant to have Long DN for Ckpt service , where their is no much piratical use case. -AVM On 10/18/2016 1:57 PM, Anders Widell wrote: > Maybe it is better to create a second shm containing a fixed-size > record of the remaining 2048-256 bytes of the DN? Then you will not > have to convert the shm format and creating a new shm segment should > be very quick. > > regards, > > Anders Widell > > > On 10/18/2016 09:29 AM, Vo Minh Hoang wrote: >> Dear Mahesh, >> >> Sorry I miss-sending incomplete email. >> This is full version. >> -- >> I would like to send my answer to 2 of your concerning points in >> compound. >> >> Based on my understand, a client command affects shared mem by following >> behavior: >> >> Client --> CPA ==> CPND (1) ==> CPD (active) ==> CPND (has replica) (2) >> > update shm (3) >> >> When: >> --> Synchronous >> ==> Asynchronous >> (1) and (2) has same behavior to update shm and store pointer to shm >> (3) The modification only take place here include swapping shm and >> update >> pointers >> >> So even there are multiple call from multiple client at a time, CPND >> update >> shm in sequence. So just after the first request swaps shm, the second >> request could access shm. There is not case that 2 requests access >> shm at >> the same time. >> When shm already storing data, in swapping, CPND will update pointer >> so the >> next request that accesses old data can still work with updated >> pointer with >> same behavior. >> >> Thank you and best regards, >> Hoang >> >> -Original Message- >> From: Vo Minh Hoang [mailto:hoang.m...@dektech.com.au] >> Sent: Tuesday, October 18, 2016 2:15 PM >> To: 'A V Mahesh' >> Cc: 'anders.wid...@ericsson.com' ; >> 'opensaf-devel@lists.sourceforge.net' >> >> Subject: RE: [PATCH 1 of 1] cpnd: use shared memory based on ckpt name >> length [#2108] >> >> Dear Mahesh, >> >> I would like to send my answer to 2 of your concerning points in >> compound. >> >> Based on my understand, a client command affects shared mem by following >> behavior: >> >> Client --> CPA ==> CPND (1) ==> CPD (active) ==> CPND (has replica) (2) >> > update shm (3) >> >> When: >> --> >> >> >> >> -Original Message- >> From: A V Mahesh [mailto:mahesh.va...@oracle.com] >> Sent: Tuesday, October 18, 2016 1:10 PM >> To: Vo Minh Hoang >> Cc: anders.wid...@ericsson.com; opensaf-devel@lists.sourceforge.net >> Subject: Re: [PATCH 1 of 1] cpnd: use shared memory based on ckpt name >> length [#2108] >> >> Hi Hoang, >> >> >> On 10/18/2016 11:24 AM, Vo Minh Hoang wrote: >>> Dear Mahesh, >>> > [AVM] A non-collated Ckpt will have two replicas on both Active and >>> standby. >>> Each node will receive one CPND_EVT_D2ND_CKPT_CREATE message so it >>> handles >>> swapping itself and does not affect each other nor another. >> [AVM] I was taking about existing, swapping of existing `small >> format shm` >>not not new create request , where the ckpt is already opened >> multiple nodes with ALL option. > [AVM] piratically we can have large size data & transit time, if ckt >> pat >>> has large data sham is file I/O operation > not middle-ware controlled activity , swap time will > vary >>> depending on system. >>> I am agree that this modification affects performance of create/open >>> function so it need performance acceptance verification. >>> Fortunately, shared mem is on memory so it is not heavily depend on >>> OS or >>> file system (unless on swap memory area). >>> Maybe I am not understand your ideal here but I have not found a clear >>> reason of handling try-again. >>
Re: [devel] [PATCH 1 of 1] cpnd: use shared memory based on ckpt name length [#2108]
Hi Hoang, Swamping SHM looks more complex logic to me while accessing old & new shm in transit , and it will create more issue and will take some time to stabilize. Let us explore other options like Anders Widell suggested or any other, which can be simple and avoids the SHM swap. I assessed these issues and I was reluctant to have Long DN for Ckpt service , where their is no much piratical use case. -AVM On 10/18/2016 1:57 PM, Anders Widell wrote: > Maybe it is better to create a second shm containing a fixed-size > record of the remaining 2048-256 bytes of the DN? Then you will not > have to convert the shm format and creating a new shm segment should > be very quick. > > regards, > > Anders Widell > > > On 10/18/2016 09:29 AM, Vo Minh Hoang wrote: >> Dear Mahesh, >> >> Sorry I miss-sending incomplete email. >> This is full version. >> -- >> I would like to send my answer to 2 of your concerning points in >> compound. >> >> Based on my understand, a client command affects shared mem by following >> behavior: >> >> Client --> CPA ==> CPND (1) ==> CPD (active) ==> CPND (has replica) (2) >> > update shm (3) >> >> When: >> --> Synchronous >> ==> Asynchronous >> (1) and (2) has same behavior to update shm and store pointer to shm >> (3) The modification only take place here include swapping shm and >> update >> pointers >> >> So even there are multiple call from multiple client at a time, CPND >> update >> shm in sequence. So just after the first request swaps shm, the second >> request could access shm. There is not case that 2 requests access >> shm at >> the same time. >> When shm already storing data, in swapping, CPND will update pointer >> so the >> next request that accesses old data can still work with updated >> pointer with >> same behavior. >> >> Thank you and best regards, >> Hoang >> >> -Original Message- >> From: Vo Minh Hoang [mailto:hoang.m...@dektech.com.au] >> Sent: Tuesday, October 18, 2016 2:15 PM >> To: 'A V Mahesh' >> Cc: 'anders.wid...@ericsson.com' ; >> 'opensaf-devel@lists.sourceforge.net' >> >> Subject: RE: [PATCH 1 of 1] cpnd: use shared memory based on ckpt name >> length [#2108] >> >> Dear Mahesh, >> >> I would like to send my answer to 2 of your concerning points in >> compound. >> >> Based on my understand, a client command affects shared mem by following >> behavior: >> >> Client --> CPA ==> CPND (1) ==> CPD (active) ==> CPND (has replica) (2) >> > update shm (3) >> >> When: >> --> >> >> >> >> -Original Message- >> From: A V Mahesh [mailto:mahesh.va...@oracle.com] >> Sent: Tuesday, October 18, 2016 1:10 PM >> To: Vo Minh Hoang >> Cc: anders.wid...@ericsson.com; opensaf-devel@lists.sourceforge.net >> Subject: Re: [PATCH 1 of 1] cpnd: use shared memory based on ckpt name >> length [#2108] >> >> Hi Hoang, >> >> >> On 10/18/2016 11:24 AM, Vo Minh Hoang wrote: >>> Dear Mahesh, >>> > [AVM] A non-collated Ckpt will have two replicas on both Active and >>> standby. >>> Each node will receive one CPND_EVT_D2ND_CKPT_CREATE message so it >>> handles >>> swapping itself and does not affect each other nor another. >> [AVM] I was taking about existing, swapping of existing `small >> format shm` >>not not new create request , where the ckpt is already opened >> multiple nodes with ALL option. > [AVM] piratically we can have large size data & transit time, if ckt >> pat >>> has large data sham is file I/O operation > not middle-ware controlled activity , swap time will > vary >>> depending on system. >>> I am agree that this modification affects performance of create/open >>> function so it need performance acceptance verification. >>> Fortunately, shared mem is on memory so it is not heavily depend on >>> OS or >>> file system (unless on swap memory area). >>> Maybe I am not understand your ideal here but I have not found a clear >>> reason of handling try-again. >> [AVM] say for example an application is writing in a loop to old `small >> format shm`, >> at that moment you started conversation of old `small format shm` to >> new `big format` >> >> -AVM >> >>> Thank you and best regards, >>> Hoang >>> >>> -Original Message- >>> From: A V Mahesh [mailto:mahesh.va...@oracle.com] >>> Sent: Tuesday, October 18, 2016 12:14 PM >>> To: Vo Minh Hoang >>> Cc: anders.wid...@ericsson.com; opensaf-devel@lists.sourceforge.net >>> Subject: Re: [PATCH 1 of 1] cpnd: use shared memory based on ckpt name >>> length [#2108] >>> >>> Hi Hoan, >>> >>> >>> On 10/18/2016 9:59 AM, Vo Minh Hoang wrote: Dear Mahesh, Thank you very much for your comments. I would like to explain my understanding and reason for this solution. Please correct me if I am wrong. - This memory swapping works on single node alone, it will occur maximum once per node in open/create checkpoint process. - This swapping action just takes place in nodes that meet condition and does not affec
Re: [devel] [PATCH 1 of 1] cpnd: use shared memory based on ckpt name length [#2108]
Maybe it is better to create a second shm containing a fixed-size record of the remaining 2048-256 bytes of the DN? Then you will not have to convert the shm format and creating a new shm segment should be very quick. regards, Anders Widell On 10/18/2016 09:29 AM, Vo Minh Hoang wrote: > Dear Mahesh, > > Sorry I miss-sending incomplete email. > This is full version. > -- > I would like to send my answer to 2 of your concerning points in compound. > > Based on my understand, a client command affects shared mem by following > behavior: > > Client --> CPA ==> CPND (1) ==> CPD (active) ==> CPND (has replica) (2) > > update shm (3) > > When: > --> Synchronous > ==> Asynchronous > (1) and (2) has same behavior to update shm and store pointer to shm > (3) The modification only take place here include swapping shm and update > pointers > > So even there are multiple call from multiple client at a time, CPND update > shm in sequence. So just after the first request swaps shm, the second > request could access shm. There is not case that 2 requests access shm at > the same time. > When shm already storing data, in swapping, CPND will update pointer so the > next request that accesses old data can still work with updated pointer with > same behavior. > > Thank you and best regards, > Hoang > > -Original Message- > From: Vo Minh Hoang [mailto:hoang.m...@dektech.com.au] > Sent: Tuesday, October 18, 2016 2:15 PM > To: 'A V Mahesh' > Cc: 'anders.wid...@ericsson.com' ; > 'opensaf-devel@lists.sourceforge.net' > Subject: RE: [PATCH 1 of 1] cpnd: use shared memory based on ckpt name > length [#2108] > > Dear Mahesh, > > I would like to send my answer to 2 of your concerning points in compound. > > Based on my understand, a client command affects shared mem by following > behavior: > > Client --> CPA ==> CPND (1) ==> CPD (active) ==> CPND (has replica) (2) > > update shm (3) > > When: > --> > > > > -Original Message- > From: A V Mahesh [mailto:mahesh.va...@oracle.com] > Sent: Tuesday, October 18, 2016 1:10 PM > To: Vo Minh Hoang > Cc: anders.wid...@ericsson.com; opensaf-devel@lists.sourceforge.net > Subject: Re: [PATCH 1 of 1] cpnd: use shared memory based on ckpt name > length [#2108] > > Hi Hoang, > > > On 10/18/2016 11:24 AM, Vo Minh Hoang wrote: >> Dear Mahesh, >> [AVM] A non-collated Ckpt will have two replicas on both Active and >> standby. >> Each node will receive one CPND_EVT_D2ND_CKPT_CREATE message so it handles >> swapping itself and does not affect each other nor another. > [AVM] I was taking about existing, swapping of existing `small format shm` >not not new create request , where the ckpt is already opened > multiple nodes with ALL option. [AVM] piratically we can have large size data & transit time, if ckt > pat >> has large data sham is file I/O operation not middle-ware controlled activity , swap time will vary >> depending on system. >> I am agree that this modification affects performance of create/open >> function so it need performance acceptance verification. >> Fortunately, shared mem is on memory so it is not heavily depend on OS or >> file system (unless on swap memory area). >> Maybe I am not understand your ideal here but I have not found a clear >> reason of handling try-again. > [AVM] say for example an application is writing in a loop to old `small > format shm`, > at that moment you started conversation of old `small format shm` to > new `big format` > > -AVM > >> Thank you and best regards, >> Hoang >> >> -Original Message- >> From: A V Mahesh [mailto:mahesh.va...@oracle.com] >> Sent: Tuesday, October 18, 2016 12:14 PM >> To: Vo Minh Hoang >> Cc: anders.wid...@ericsson.com; opensaf-devel@lists.sourceforge.net >> Subject: Re: [PATCH 1 of 1] cpnd: use shared memory based on ckpt name >> length [#2108] >> >> Hi Hoan, >> >> >> On 10/18/2016 9:59 AM, Vo Minh Hoang wrote: >>> Dear Mahesh, >>> >>> Thank you very much for your comments. >>> >>> I would like to explain my understanding and reason for this solution. >>> Please correct me if I am wrong. >>> >>> - This memory swapping works on single node alone, it will occur >>> maximum once per node in open/create checkpoint process. >>> - This swapping action just takes place in nodes that meet condition >>> and does not affect other node. >> [AVM] A non-collated Ckpt will have two replicas on both Active and > standby >> . >>> - CPND handles open/create processes atomically in sequence in one >>> thread only. >>> >>> Because of that I think it is unnecessary to implement thread >>> synchronizing or `try-again` handling. >> [AVM] piratically we can have large size data & transit time, if ckt pat >> has large data sham is file I/O operation >>not middle-ware controlled activity , swap time will vary >> depending on system. >>> Sincerely, >>> Hoang >>> >>> -Original Message- >>> From: A V Mahesh [mailto:mahesh.va...@oracle.com] >>> Sent: Tuesd
Re: [devel] [PATCH 1 of 1] cpnd: use shared memory based on ckpt name length [#2108]
Dear Mahesh, Sorry I miss-sending incomplete email. This is full version. -- I would like to send my answer to 2 of your concerning points in compound. Based on my understand, a client command affects shared mem by following behavior: Client --> CPA ==> CPND (1) ==> CPD (active) ==> CPND (has replica) (2) > update shm (3) When: --> Synchronous ==> Asynchronous (1) and (2) has same behavior to update shm and store pointer to shm (3) The modification only take place here include swapping shm and update pointers So even there are multiple call from multiple client at a time, CPND update shm in sequence. So just after the first request swaps shm, the second request could access shm. There is not case that 2 requests access shm at the same time. When shm already storing data, in swapping, CPND will update pointer so the next request that accesses old data can still work with updated pointer with same behavior. Thank you and best regards, Hoang -Original Message- From: Vo Minh Hoang [mailto:hoang.m...@dektech.com.au] Sent: Tuesday, October 18, 2016 2:15 PM To: 'A V Mahesh' Cc: 'anders.wid...@ericsson.com' ; 'opensaf-devel@lists.sourceforge.net' Subject: RE: [PATCH 1 of 1] cpnd: use shared memory based on ckpt name length [#2108] Dear Mahesh, I would like to send my answer to 2 of your concerning points in compound. Based on my understand, a client command affects shared mem by following behavior: Client --> CPA ==> CPND (1) ==> CPD (active) ==> CPND (has replica) (2) > update shm (3) When: --> -Original Message- From: A V Mahesh [mailto:mahesh.va...@oracle.com] Sent: Tuesday, October 18, 2016 1:10 PM To: Vo Minh Hoang Cc: anders.wid...@ericsson.com; opensaf-devel@lists.sourceforge.net Subject: Re: [PATCH 1 of 1] cpnd: use shared memory based on ckpt name length [#2108] Hi Hoang, On 10/18/2016 11:24 AM, Vo Minh Hoang wrote: > Dear Mahesh, > >>> [AVM] A non-collated Ckpt will have two replicas on both Active and > standby. > Each node will receive one CPND_EVT_D2ND_CKPT_CREATE message so it handles > swapping itself and does not affect each other nor another. [AVM] I was taking about existing, swapping of existing `small format shm` not not new create request , where the ckpt is already opened multiple nodes with ALL option. > >>> [AVM] piratically we can have large size data & transit time, if ckt pat > has large data sham is file I/O operation >>> not middle-ware controlled activity , swap time will vary > depending on system. > I am agree that this modification affects performance of create/open > function so it need performance acceptance verification. > Fortunately, shared mem is on memory so it is not heavily depend on OS or > file system (unless on swap memory area). > Maybe I am not understand your ideal here but I have not found a clear > reason of handling try-again. [AVM] say for example an application is writing in a loop to old `small format shm`, at that moment you started conversation of old `small format shm` to new `big format` -AVM > > Thank you and best regards, > Hoang > > -Original Message- > From: A V Mahesh [mailto:mahesh.va...@oracle.com] > Sent: Tuesday, October 18, 2016 12:14 PM > To: Vo Minh Hoang > Cc: anders.wid...@ericsson.com; opensaf-devel@lists.sourceforge.net > Subject: Re: [PATCH 1 of 1] cpnd: use shared memory based on ckpt name > length [#2108] > > Hi Hoan, > > > On 10/18/2016 9:59 AM, Vo Minh Hoang wrote: >> Dear Mahesh, >> >> Thank you very much for your comments. >> >> I would like to explain my understanding and reason for this solution. >> Please correct me if I am wrong. >> >> - This memory swapping works on single node alone, it will occur >> maximum once per node in open/create checkpoint process. >> - This swapping action just takes place in nodes that meet condition >> and does not affect other node. > [AVM] A non-collated Ckpt will have two replicas on both Active and standby > . >> - CPND handles open/create processes atomically in sequence in one >> thread only. >> >> Because of that I think it is unnecessary to implement thread >> synchronizing or `try-again` handling. > [AVM] piratically we can have large size data & transit time, if ckt pat > has large data sham is file I/O operation > not middle-ware controlled activity , swap time will vary > depending on system. >> Sincerely, >> Hoang >> >> -Original Message- >> From: A V Mahesh [mailto:mahesh.va...@oracle.com] >> Sent: Tuesday, October 18, 2016 10:48 AM >> To: Vo Minh Hoang >> Cc: anders.wid...@ericsson.com; opensaf-devel@lists.sourceforge.net >> Subject: Re: [PATCH 1 of 1] cpnd: use shared memory based on ckpt name >> length [#2108] >> >> Hi Hoang, >> >> On 10/13/2016 12:44 PM, Vo Minh Hoang wrote: >>> No, old checkpoint data is converted to `big format`. >>> So all of them will be stored in `big format`. >> [AVM] This approach is introducing NEW transit , so far application >> are aware of sw
Re: [devel] [PATCH 1 of 1] cpnd: use shared memory based on ckpt name length [#2108]
Dear Mahesh, I would like to send my answer to 2 of your concerning points in compound. Based on my understand, a client command affects shared mem by following behavior: Client --> CPA ==> CPND (1) ==> CPD (active) ==> CPND (has replica) (2) > update shm (3) When: --> -Original Message- From: A V Mahesh [mailto:mahesh.va...@oracle.com] Sent: Tuesday, October 18, 2016 1:10 PM To: Vo Minh Hoang Cc: anders.wid...@ericsson.com; opensaf-devel@lists.sourceforge.net Subject: Re: [PATCH 1 of 1] cpnd: use shared memory based on ckpt name length [#2108] Hi Hoang, On 10/18/2016 11:24 AM, Vo Minh Hoang wrote: > Dear Mahesh, > >>> [AVM] A non-collated Ckpt will have two replicas on both Active and > standby. > Each node will receive one CPND_EVT_D2ND_CKPT_CREATE message so it handles > swapping itself and does not affect each other nor another. [AVM] I was taking about existing, swapping of existing `small format shm` not not new create request , where the ckpt is already opened multiple nodes with ALL option. > >>> [AVM] piratically we can have large size data & transit time, if ckt pat > has large data sham is file I/O operation >>> not middle-ware controlled activity , swap time will vary > depending on system. > I am agree that this modification affects performance of create/open > function so it need performance acceptance verification. > Fortunately, shared mem is on memory so it is not heavily depend on OS or > file system (unless on swap memory area). > Maybe I am not understand your ideal here but I have not found a clear > reason of handling try-again. [AVM] say for example an application is writing in a loop to old `small format shm`, at that moment you started conversation of old `small format shm` to new `big format` -AVM > > Thank you and best regards, > Hoang > > -Original Message- > From: A V Mahesh [mailto:mahesh.va...@oracle.com] > Sent: Tuesday, October 18, 2016 12:14 PM > To: Vo Minh Hoang > Cc: anders.wid...@ericsson.com; opensaf-devel@lists.sourceforge.net > Subject: Re: [PATCH 1 of 1] cpnd: use shared memory based on ckpt name > length [#2108] > > Hi Hoan, > > > On 10/18/2016 9:59 AM, Vo Minh Hoang wrote: >> Dear Mahesh, >> >> Thank you very much for your comments. >> >> I would like to explain my understanding and reason for this solution. >> Please correct me if I am wrong. >> >> - This memory swapping works on single node alone, it will occur >> maximum once per node in open/create checkpoint process. >> - This swapping action just takes place in nodes that meet condition >> and does not affect other node. > [AVM] A non-collated Ckpt will have two replicas on both Active and standby > . >> - CPND handles open/create processes atomically in sequence in one >> thread only. >> >> Because of that I think it is unnecessary to implement thread >> synchronizing or `try-again` handling. > [AVM] piratically we can have large size data & transit time, if ckt pat > has large data sham is file I/O operation > not middle-ware controlled activity , swap time will vary > depending on system. >> Sincerely, >> Hoang >> >> -Original Message- >> From: A V Mahesh [mailto:mahesh.va...@oracle.com] >> Sent: Tuesday, October 18, 2016 10:48 AM >> To: Vo Minh Hoang >> Cc: anders.wid...@ericsson.com; opensaf-devel@lists.sourceforge.net >> Subject: Re: [PATCH 1 of 1] cpnd: use shared memory based on ckpt name >> length [#2108] >> >> Hi Hoang, >> >> On 10/13/2016 12:44 PM, Vo Minh Hoang wrote: >>> No, old checkpoint data is converted to `big format`. >>> So all of them will be stored in `big format`. >> [AVM] This approach is introducing NEW transit , so far application >> are aware of switch-over & fail-over transit and TRY-AGAIN is >> expected only in those case , now this solution is introducing a new >> transit for the application which are accessioning the old (by the >> way this patch didn't implemented TRY-AGAIN when shared memory >> swapping action occurring) >> >> `small format shm`, up on some application creating `big format` ( >> application impacting the HA behavior ) >> not sure about the solution approach need to discussed ! >> >> -AVM >> >> On 10/13/2016 12:44 PM, Vo Minh Hoang wrote: >>> Dear Mahesh, >>> >>> Because of keeping the consistent working behavior of existing >>> function, only 1 shared memory at a time. If shared memory swapping >>> action occurs, a new shared memory will replace old one. >>> >>> Here is the detailed answers to your questions: > -The existing `small format shm` will continue to be small , is > that >>> right ? > -Only newly created longDN checkpoint will be in `big format shm`, > is >>> that right ? >>> No, old checkpoint data is converted to `big format`. >>> So all of them will be stored in `big format`. >>> > - what will be the format of newly joined the PL-5 opens an > existing >>> `small format shm` >>> PL-5 still use `small format`. >>> Only when
Re: [devel] [PATCH 1 of 1] cpnd: use shared memory based on ckpt name length [#2108]
Hi Hoang, On 10/18/2016 11:24 AM, Vo Minh Hoang wrote: > Dear Mahesh, > >>> [AVM] A non-collated Ckpt will have two replicas on both Active and > standby. > Each node will receive one CPND_EVT_D2ND_CKPT_CREATE message so it handles > swapping itself and does not affect each other nor another. [AVM] I was taking about existing, swapping of existing `small format shm` not not new create request , where the ckpt is already opened multiple nodes with ALL option. > >>> [AVM] piratically we can have large size data & transit time, if ckt pat > has large data sham is file I/O operation >>> not middle-ware controlled activity , swap time will vary > depending on system. > I am agree that this modification affects performance of create/open > function so it need performance acceptance verification. > Fortunately, shared mem is on memory so it is not heavily depend on OS or > file system (unless on swap memory area). > Maybe I am not understand your ideal here but I have not found a clear > reason of handling try-again. [AVM] say for example an application is writing in a loop to old `small format shm`, at that moment you started conversation of old `small format shm` to new `big format` -AVM > > Thank you and best regards, > Hoang > > -Original Message- > From: A V Mahesh [mailto:mahesh.va...@oracle.com] > Sent: Tuesday, October 18, 2016 12:14 PM > To: Vo Minh Hoang > Cc: anders.wid...@ericsson.com; opensaf-devel@lists.sourceforge.net > Subject: Re: [PATCH 1 of 1] cpnd: use shared memory based on ckpt name > length [#2108] > > Hi Hoan, > > > On 10/18/2016 9:59 AM, Vo Minh Hoang wrote: >> Dear Mahesh, >> >> Thank you very much for your comments. >> >> I would like to explain my understanding and reason for this solution. >> Please correct me if I am wrong. >> >> - This memory swapping works on single node alone, it will occur >> maximum once per node in open/create checkpoint process. >> - This swapping action just takes place in nodes that meet condition >> and does not affect other node. > [AVM] A non-collated Ckpt will have two replicas on both Active and standby > . >> - CPND handles open/create processes atomically in sequence in one >> thread only. >> >> Because of that I think it is unnecessary to implement thread >> synchronizing or `try-again` handling. > [AVM] piratically we can have large size data & transit time, if ckt pat > has large data sham is file I/O operation > not middle-ware controlled activity , swap time will vary > depending on system. >> Sincerely, >> Hoang >> >> -Original Message- >> From: A V Mahesh [mailto:mahesh.va...@oracle.com] >> Sent: Tuesday, October 18, 2016 10:48 AM >> To: Vo Minh Hoang >> Cc: anders.wid...@ericsson.com; opensaf-devel@lists.sourceforge.net >> Subject: Re: [PATCH 1 of 1] cpnd: use shared memory based on ckpt name >> length [#2108] >> >> Hi Hoang, >> >> On 10/13/2016 12:44 PM, Vo Minh Hoang wrote: >>> No, old checkpoint data is converted to `big format`. >>> So all of them will be stored in `big format`. >> [AVM] This approach is introducing NEW transit , so far application >> are aware of switch-over & fail-over transit and TRY-AGAIN is >> expected only in those case , now this solution is introducing a new >> transit for the application which are accessioning the old (by the >> way this patch didn't implemented TRY-AGAIN when shared memory >> swapping action occurring) >> >> `small format shm`, up on some application creating `big format` ( >> application impacting the HA behavior ) >> not sure about the solution approach need to discussed ! >> >> -AVM >> >> On 10/13/2016 12:44 PM, Vo Minh Hoang wrote: >>> Dear Mahesh, >>> >>> Because of keeping the consistent working behavior of existing >>> function, only 1 shared memory at a time. If shared memory swapping >>> action occurs, a new shared memory will replace old one. >>> >>> Here is the detailed answers to your questions: > -The existing `small format shm` will continue to be small , is > that >>> right ? > -Only newly created longDN checkpoint will be in `big format shm`, > is >>> that right ? >>> No, old checkpoint data is converted to `big format`. >>> So all of them will be stored in `big format`. >>> > - what will be the format of newly joined the PL-5 opens an > existing >>> `small format shm` >>> PL-5 still use `small format`. >>> Only when a long DN replica is added in this node, the shared memory >>> is converted to `big format`. > the what will be the new replica on new node `small format shm` > or `big >>> format shm` ? >>> This implementation only affect the `header` shared memory >>> (opensaf_CPND_CHECKPOINT_INFO_nodeid). It do not change replica >>> shared memory (opensaf_ckptname_nodeid_n). >>> >>> About testing, because of above specification, I tested: >>> - start new node >>> - restart ckptnd with existing small shm >>> - restart ckptnd with existing big shm >>> - create first lon
Re: [devel] [PATCH 1 of 1] cpnd: use shared memory based on ckpt name length [#2108]
Dear Mahesh, >> [AVM] A non-collated Ckpt will have two replicas on both Active and standby. Each node will receive one CPND_EVT_D2ND_CKPT_CREATE message so it handles swapping itself and does not affect each other nor another. >> [AVM] piratically we can have large size data & transit time, if ckt pat has large data sham is file I/O operation >> not middle-ware controlled activity , swap time will vary depending on system. I am agree that this modification affects performance of create/open function so it need performance acceptance verification. Fortunately, shared mem is on memory so it is not heavily depend on OS or file system (unless on swap memory area). Maybe I am not understand your ideal here but I have not found a clear reason of handling try-again. Thank you and best regards, Hoang -Original Message- From: A V Mahesh [mailto:mahesh.va...@oracle.com] Sent: Tuesday, October 18, 2016 12:14 PM To: Vo Minh Hoang Cc: anders.wid...@ericsson.com; opensaf-devel@lists.sourceforge.net Subject: Re: [PATCH 1 of 1] cpnd: use shared memory based on ckpt name length [#2108] Hi Hoan, On 10/18/2016 9:59 AM, Vo Minh Hoang wrote: > Dear Mahesh, > > Thank you very much for your comments. > > I would like to explain my understanding and reason for this solution. > Please correct me if I am wrong. > > - This memory swapping works on single node alone, it will occur > maximum once per node in open/create checkpoint process. > - This swapping action just takes place in nodes that meet condition > and does not affect other node. [AVM] A non-collated Ckpt will have two replicas on both Active and standby . > - CPND handles open/create processes atomically in sequence in one > thread only. > > Because of that I think it is unnecessary to implement thread > synchronizing or `try-again` handling. [AVM] piratically we can have large size data & transit time, if ckt pat has large data sham is file I/O operation not middle-ware controlled activity , swap time will vary depending on system. > > Sincerely, > Hoang > > -Original Message- > From: A V Mahesh [mailto:mahesh.va...@oracle.com] > Sent: Tuesday, October 18, 2016 10:48 AM > To: Vo Minh Hoang > Cc: anders.wid...@ericsson.com; opensaf-devel@lists.sourceforge.net > Subject: Re: [PATCH 1 of 1] cpnd: use shared memory based on ckpt name > length [#2108] > > Hi Hoang, > > On 10/13/2016 12:44 PM, Vo Minh Hoang wrote: >> No, old checkpoint data is converted to `big format`. >> So all of them will be stored in `big format`. > [AVM] This approach is introducing NEW transit , so far application > are aware of switch-over & fail-over transit and TRY-AGAIN is > expected only in those case , now this solution is introducing a new > transit for the application which are accessioning the old (by the > way this patch didn't implemented TRY-AGAIN when shared memory > swapping action occurring) > > `small format shm`, up on some application creating `big format` ( > application impacting the HA behavior ) >not sure about the solution approach need to discussed ! > > -AVM > > On 10/13/2016 12:44 PM, Vo Minh Hoang wrote: >> Dear Mahesh, >> >> Because of keeping the consistent working behavior of existing >> function, only 1 shared memory at a time. If shared memory swapping >> action occurs, a new shared memory will replace old one. >> >> Here is the detailed answers to your questions: -The existing `small format shm` will continue to be small , is that >> right ? -Only newly created longDN checkpoint will be in `big format shm`, is >> that right ? >> No, old checkpoint data is converted to `big format`. >> So all of them will be stored in `big format`. >> - what will be the format of newly joined the PL-5 opens an existing >> `small format shm` >> PL-5 still use `small format`. >> Only when a long DN replica is added in this node, the shared memory >> is converted to `big format`. the what will be the new replica on new node `small format shm` or `big >> format shm` ? >> This implementation only affect the `header` shared memory >> (opensaf_CPND_CHECKPOINT_INFO_nodeid). It do not change replica >> shared memory (opensaf_ckptname_nodeid_n). >> >> About testing, because of above specification, I tested: >> - start new node >> - restart ckptnd with existing small shm >> - restart ckptnd with existing big shm >> - create first long dn (check all node) >> >> Thank you and best regards, >> Hoang >> >> -Original Message- >> From: A V Mahesh [mailto:mahesh.va...@oracle.com] >> Sent: Thursday, October 13, 2016 1:33 PM >> To: Hoang Vo ; anders.wid...@ericsson.com >> Cc: opensaf-devel@lists.sourceforge.net >> Subject: Re: [PATCH 1 of 1] cpnd: use shared memory based on ckpt >> name length [#2108] >> >> Hi Hoang, >> >>>> - Run time cpnd keep using small format shm until first longDN >> checkpoint is created. >>>> After that cpnd use big format shm. >>
Re: [devel] [PATCH 1 of 1] cpnd: use shared memory based on ckpt name length [#2108]
Hi Hoan, On 10/18/2016 9:59 AM, Vo Minh Hoang wrote: > Dear Mahesh, > > Thank you very much for your comments. > > I would like to explain my understanding and reason for this solution. > Please correct me if I am wrong. > > - This memory swapping works on single node alone, it will occur maximum > once per node in open/create checkpoint process. > - This swapping action just takes place in nodes that meet condition and > does not affect other node. [AVM] A non-collated Ckpt will have two replicas on both Active and standby . > - CPND handles open/create processes atomically in sequence in one thread > only. > > Because of that I think it is unnecessary to implement thread synchronizing > or `try-again` handling. [AVM] piratically we can have large size data & transit time, if ckt pat has large data sham is file I/O operation not middle-ware controlled activity , swap time will vary depending on system. > > Sincerely, > Hoang > > -Original Message- > From: A V Mahesh [mailto:mahesh.va...@oracle.com] > Sent: Tuesday, October 18, 2016 10:48 AM > To: Vo Minh Hoang > Cc: anders.wid...@ericsson.com; opensaf-devel@lists.sourceforge.net > Subject: Re: [PATCH 1 of 1] cpnd: use shared memory based on ckpt name > length [#2108] > > Hi Hoang, > > On 10/13/2016 12:44 PM, Vo Minh Hoang wrote: >> No, old checkpoint data is converted to `big format`. >> So all of them will be stored in `big format`. > [AVM] This approach is introducing NEW transit , so far application are > aware of switch-over & fail-over transit and TRY-AGAIN is expected only in > those case , now this solution is introducing a new transit for the > application which are accessioning the old (by the way this patch didn't > implemented TRY-AGAIN when shared memory swapping action occurring) > > `small format shm`, up on some application creating `big format` ( > application impacting the HA behavior ) >not sure about the solution approach need to discussed ! > > -AVM > > On 10/13/2016 12:44 PM, Vo Minh Hoang wrote: >> Dear Mahesh, >> >> Because of keeping the consistent working behavior of existing >> function, only 1 shared memory at a time. If shared memory swapping >> action occurs, a new shared memory will replace old one. >> >> Here is the detailed answers to your questions: -The existing `small format shm` will continue to be small , is that >> right ? -Only newly created longDN checkpoint will be in `big format shm`, is >> that right ? >> No, old checkpoint data is converted to `big format`. >> So all of them will be stored in `big format`. >> - what will be the format of newly joined the PL-5 opens an existing >> `small format shm` >> PL-5 still use `small format`. >> Only when a long DN replica is added in this node, the shared memory >> is converted to `big format`. the what will be the new replica on new node `small format shm` or `big >> format shm` ? >> This implementation only affect the `header` shared memory >> (opensaf_CPND_CHECKPOINT_INFO_nodeid). It do not change replica shared >> memory (opensaf_ckptname_nodeid_n). >> >> About testing, because of above specification, I tested: >> - start new node >> - restart ckptnd with existing small shm >> - restart ckptnd with existing big shm >> - create first long dn (check all node) >> >> Thank you and best regards, >> Hoang >> >> -Original Message- >> From: A V Mahesh [mailto:mahesh.va...@oracle.com] >> Sent: Thursday, October 13, 2016 1:33 PM >> To: Hoang Vo ; anders.wid...@ericsson.com >> Cc: opensaf-devel@lists.sourceforge.net >> Subject: Re: [PATCH 1 of 1] cpnd: use shared memory based on ckpt name >> length [#2108] >> >> Hi Hoang, >> >>>> - Run time cpnd keep using small format shm until first longDN >> checkpoint is created. >>>> After that cpnd use big format shm. >> >> While reviewing I am assuming following please confirm : >> >> -The existing `small format shm` will continue to be small , is >> that right ? >> -Only newly created longDN checkpoint will be in `big format shm`, is >> that right ? >> - what will be the format of newly joined the PL-5 opens an existing >> `small format shm` >> the what will be the new replica on new node `small format shm` >> or `big format shm` ? >> >> >> I hope you tested following : >> == >> - combination of some `small format shm` and some `big format shm` >> ckpts >> - Joined a New node ( say PL-5) and then opened the existing `small >> format shm` ckpt from the new Node >> - Restating controller which has combination of `small format shm` >> and `big format shm` and how the restored non-collocated ckpt`s >> >> -AVM >> >> On 10/11/2016 1:15 PM, Hoang Vo wrote: >>> osaf/libs/common/cpsv/include/cpsv_shm.h |9 +- >>> osaf/services/saf/cpsv/cpnd/cpnd_res.c | 565 >> -- >>> 2 files changed, 536 insertions(+), 38 deletions(-) >>> >>> >>> problem: In the case of CKPT o
Re: [devel] [PATCH 1 of 1] cpnd: use shared memory based on ckpt name length [#2108]
Dear Mahesh, Thank you very much for your comments. I would like to explain my understanding and reason for this solution. Please correct me if I am wrong. - This memory swapping works on single node alone, it will occur maximum once per node in open/create checkpoint process. - This swapping action just takes place in nodes that meet condition and does not affect other node. - CPND handles open/create processes atomically in sequence in one thread only. Because of that I think it is unnecessary to implement thread synchronizing or `try-again` handling. Sincerely, Hoang -Original Message- From: A V Mahesh [mailto:mahesh.va...@oracle.com] Sent: Tuesday, October 18, 2016 10:48 AM To: Vo Minh Hoang Cc: anders.wid...@ericsson.com; opensaf-devel@lists.sourceforge.net Subject: Re: [PATCH 1 of 1] cpnd: use shared memory based on ckpt name length [#2108] Hi Hoang, On 10/13/2016 12:44 PM, Vo Minh Hoang wrote: > No, old checkpoint data is converted to `big format`. > So all of them will be stored in `big format`. [AVM] This approach is introducing NEW transit , so far application are aware of switch-over & fail-over transit and TRY-AGAIN is expected only in those case , now this solution is introducing a new transit for the application which are accessioning the old (by the way this patch didn't implemented TRY-AGAIN when shared memory swapping action occurring) `small format shm`, up on some application creating `big format` ( application impacting the HA behavior ) not sure about the solution approach need to discussed ! -AVM On 10/13/2016 12:44 PM, Vo Minh Hoang wrote: > Dear Mahesh, > > Because of keeping the consistent working behavior of existing > function, only 1 shared memory at a time. If shared memory swapping > action occurs, a new shared memory will replace old one. > > Here is the detailed answers to your questions: >>> -The existing `small format shm` will continue to be small , is >>> that > right ? >>> -Only newly created longDN checkpoint will be in `big format shm`, >>> is > that right ? > No, old checkpoint data is converted to `big format`. > So all of them will be stored in `big format`. > >>> - what will be the format of newly joined the PL-5 opens an >>> existing > `small format shm` > PL-5 still use `small format`. > Only when a long DN replica is added in this node, the shared memory > is converted to `big format`. >>> the what will be the new replica on new node `small format shm` >>> or `big > format shm` ? > This implementation only affect the `header` shared memory > (opensaf_CPND_CHECKPOINT_INFO_nodeid). It do not change replica shared > memory (opensaf_ckptname_nodeid_n). > > About testing, because of above specification, I tested: > - start new node > - restart ckptnd with existing small shm > - restart ckptnd with existing big shm > - create first long dn (check all node) > > Thank you and best regards, > Hoang > > -Original Message- > From: A V Mahesh [mailto:mahesh.va...@oracle.com] > Sent: Thursday, October 13, 2016 1:33 PM > To: Hoang Vo ; anders.wid...@ericsson.com > Cc: opensaf-devel@lists.sourceforge.net > Subject: Re: [PATCH 1 of 1] cpnd: use shared memory based on ckpt name > length [#2108] > > Hi Hoang, > > >> - Run time cpnd keep using small format shm until first longDN > checkpoint is created. > >> After that cpnd use big format shm. > > While reviewing I am assuming following please confirm : > > -The existing `small format shm` will continue to be small , is > that right ? > -Only newly created longDN checkpoint will be in `big format shm`, is > that right ? > - what will be the format of newly joined the PL-5 opens an existing > `small format shm` > the what will be the new replica on new node `small format shm` > or `big format shm` ? > > > I hope you tested following : > == > - combination of some `small format shm` and some `big format shm` > ckpts > - Joined a New node ( say PL-5) and then opened the existing `small > format shm` ckpt from the new Node > - Restating controller which has combination of `small format shm` > and `big format shm` and how the restored non-collocated ckpt`s > > -AVM > > On 10/11/2016 1:15 PM, Hoang Vo wrote: >>osaf/libs/common/cpsv/include/cpsv_shm.h |9 +- >>osaf/services/saf/cpsv/cpnd/cpnd_res.c | 565 > -- >>2 files changed, 536 insertions(+), 38 deletions(-) >> >> >> problem: In the case of CKPT osafckptnd increased 3,5Mb - 240 percent >> on all nodes CKPT_INFO size inscrease when support longDN lead to >> total > size increase. >> solution: >> - From start, cpnd use small format shm. >> - Run time cpnd keep using small format shm until first longDN >> checkpoint > is created. >> After that cpnd use big format shm. >> >> diff --git a/osaf/libs/common/cpsv/include/cpsv_shm.h >> b/osaf/libs/common/cpsv/include/cpsv_shm.h >> --- a/osaf/libs/common/cpsv/include/cpsv_shm.h >> +++ b/osa
Re: [devel] [PATCH 1 of 1] cpnd: use shared memory based on ckpt name length [#2108]
Hi Hoang, On 10/13/2016 12:44 PM, Vo Minh Hoang wrote: > No, old checkpoint data is converted to `big format`. > So all of them will be stored in `big format`. [AVM] This approach is introducing NEW transit , so far application are aware of switch-over & fail-over transit and TRY-AGAIN is expected only in those case , now this solution is introducing a new transit for the application which are accessioning the old (by the way this patch didn't implemented TRY-AGAIN when shared memory swapping action occurring) `small format shm`, up on some application creating `big format` ( application impacting the HA behavior ) not sure about the solution approach need to discussed ! -AVM On 10/13/2016 12:44 PM, Vo Minh Hoang wrote: > Dear Mahesh, > > Because of keeping the consistent working behavior of existing function, > only 1 shared memory at a time. If shared memory swapping action occurs, a > new shared memory will replace old one. > > Here is the detailed answers to your questions: >>> -The existing `small format shm` will continue to be small , is that > right ? >>> -Only newly created longDN checkpoint will be in `big format shm`, is > that right ? > No, old checkpoint data is converted to `big format`. > So all of them will be stored in `big format`. > >>> - what will be the format of newly joined the PL-5 opens an existing > `small format shm` > PL-5 still use `small format`. > Only when a long DN replica is added in this node, the shared memory is > converted to `big format`. >>> the what will be the new replica on new node `small format shm` or `big > format shm` ? > This implementation only affect the `header` shared memory > (opensaf_CPND_CHECKPOINT_INFO_nodeid). It do not change replica shared > memory (opensaf_ckptname_nodeid_n). > > About testing, because of above specification, I tested: > - start new node > - restart ckptnd with existing small shm > - restart ckptnd with existing big shm > - create first long dn (check all node) > > Thank you and best regards, > Hoang > > -Original Message- > From: A V Mahesh [mailto:mahesh.va...@oracle.com] > Sent: Thursday, October 13, 2016 1:33 PM > To: Hoang Vo ; anders.wid...@ericsson.com > Cc: opensaf-devel@lists.sourceforge.net > Subject: Re: [PATCH 1 of 1] cpnd: use shared memory based on ckpt name > length [#2108] > > Hi Hoang, > > >> - Run time cpnd keep using small format shm until first longDN > checkpoint is created. > >> After that cpnd use big format shm. > > While reviewing I am assuming following please confirm : > > -The existing `small format shm` will continue to be small , is that > right ? > -Only newly created longDN checkpoint will be in `big format shm`, is that > right ? > - what will be the format of newly joined the PL-5 opens an existing `small > format shm` > the what will be the new replica on new node `small format shm` or `big > format shm` ? > > > I hope you tested following : > == > - combination of some `small format shm` and some `big format shm` ckpts > - Joined a New node ( say PL-5) and then opened the existing `small format > shm` ckpt from the new Node > - Restating controller which has combination of `small format shm` and `big > format shm` and how the restored non-collocated ckpt`s > > -AVM > > On 10/11/2016 1:15 PM, Hoang Vo wrote: >>osaf/libs/common/cpsv/include/cpsv_shm.h |9 +- >>osaf/services/saf/cpsv/cpnd/cpnd_res.c | 565 > -- >>2 files changed, 536 insertions(+), 38 deletions(-) >> >> >> problem: In the case of CKPT osafckptnd increased 3,5Mb - 240 percent >> on all nodes CKPT_INFO size inscrease when support longDN lead to total > size increase. >> solution: >> - From start, cpnd use small format shm. >> - Run time cpnd keep using small format shm until first longDN checkpoint > is created. >> After that cpnd use big format shm. >> >> diff --git a/osaf/libs/common/cpsv/include/cpsv_shm.h >> b/osaf/libs/common/cpsv/include/cpsv_shm.h >> --- a/osaf/libs/common/cpsv/include/cpsv_shm.h >> +++ b/osaf/libs/common/cpsv/include/cpsv_shm.h >> @@ -27,7 +27,8 @@ >>#define SHM_NEXT -3 >>#define SHM_INIT -1 >> >> -#define CPSV_CPND_SHM_VERSION1 >> +#define CPSV_CPND_SHM_VERSION_SHORT_DN 0 >> +#define CPSV_CPND_SHM_VERSION_LONG_DN 1 >> >>typedef struct cpsv_ckpt_hdr { >> SaCkptCheckpointHandleT ckpt_id;/* Index for identifying the > checkpoint */ >> @@ -134,4 +135,10 @@ typedef enum cpnd_type_info { >> CPND_CKPT_INFO >>} CPND_TYPE_INFO; >> >> +#define cpsv_cpnd_shm_size(x) x == CPSV_CPND_SHM_VERSION_LONG_DN ? \ >> +sizeof(CLIENT_HDR) + (MAX_CLIENTS * sizeof(CLIENT_INFO)) + > \ >> +sizeof(CKPT_HDR) + (MAX_CKPTS * sizeof(CKPT_INFO)) : > \ >> +sizeof(CLIENT_HDR) + (MAX_CLIENTS * sizeof(CLIENT_INFO)) + > \ >> +sizeof(CKPT_HDR) + (MAX_CKPTS * sizeof(CKPT_INFO_V0)) > \ >> + >>#endif >> diff --git a/osaf/services/saf/cpsv/cpnd/cpnd_r
Re: [devel] [PATCH 1 of 1] cpnd: use shared memory based on ckpt name length [#2108]
Dear Mahesh, Because of keeping the consistent working behavior of existing function, only 1 shared memory at a time. If shared memory swapping action occurs, a new shared memory will replace old one. Here is the detailed answers to your questions: >> -The existing `small format shm` will continue to be small , is that right ? >> -Only newly created longDN checkpoint will be in `big format shm`, is that right ? No, old checkpoint data is converted to `big format`. So all of them will be stored in `big format`. >> - what will be the format of newly joined the PL-5 opens an existing `small format shm` PL-5 still use `small format`. Only when a long DN replica is added in this node, the shared memory is converted to `big format`. >> the what will be the new replica on new node `small format shm` or `big format shm` ? This implementation only affect the `header` shared memory (opensaf_CPND_CHECKPOINT_INFO_nodeid). It do not change replica shared memory (opensaf_ckptname_nodeid_n). About testing, because of above specification, I tested: - start new node - restart ckptnd with existing small shm - restart ckptnd with existing big shm - create first long dn (check all node) Thank you and best regards, Hoang -Original Message- From: A V Mahesh [mailto:mahesh.va...@oracle.com] Sent: Thursday, October 13, 2016 1:33 PM To: Hoang Vo ; anders.wid...@ericsson.com Cc: opensaf-devel@lists.sourceforge.net Subject: Re: [PATCH 1 of 1] cpnd: use shared memory based on ckpt name length [#2108] Hi Hoang, >> - Run time cpnd keep using small format shm until first longDN checkpoint is created. >> After that cpnd use big format shm. While reviewing I am assuming following please confirm : -The existing `small format shm` will continue to be small , is that right ? -Only newly created longDN checkpoint will be in `big format shm`, is that right ? - what will be the format of newly joined the PL-5 opens an existing `small format shm` the what will be the new replica on new node `small format shm` or `big format shm` ? I hope you tested following : == - combination of some `small format shm` and some `big format shm` ckpts - Joined a New node ( say PL-5) and then opened the existing `small format shm` ckpt from the new Node - Restating controller which has combination of `small format shm` and `big format shm` and how the restored non-collocated ckpt`s -AVM On 10/11/2016 1:15 PM, Hoang Vo wrote: > osaf/libs/common/cpsv/include/cpsv_shm.h |9 +- > osaf/services/saf/cpsv/cpnd/cpnd_res.c | 565 -- > 2 files changed, 536 insertions(+), 38 deletions(-) > > > problem: In the case of CKPT osafckptnd increased 3,5Mb - 240 percent > on all nodes CKPT_INFO size inscrease when support longDN lead to total size increase. > > solution: > - From start, cpnd use small format shm. > - Run time cpnd keep using small format shm until first longDN checkpoint is created. > After that cpnd use big format shm. > > diff --git a/osaf/libs/common/cpsv/include/cpsv_shm.h > b/osaf/libs/common/cpsv/include/cpsv_shm.h > --- a/osaf/libs/common/cpsv/include/cpsv_shm.h > +++ b/osaf/libs/common/cpsv/include/cpsv_shm.h > @@ -27,7 +27,8 @@ > #define SHM_NEXT -3 > #define SHM_INIT -1 > > -#define CPSV_CPND_SHM_VERSION1 > +#define CPSV_CPND_SHM_VERSION_SHORT_DN 0 > +#define CPSV_CPND_SHM_VERSION_LONG_DN1 > > typedef struct cpsv_ckpt_hdr { > SaCkptCheckpointHandleT ckpt_id;/* Index for identifying the checkpoint */ > @@ -134,4 +135,10 @@ typedef enum cpnd_type_info { > CPND_CKPT_INFO > } CPND_TYPE_INFO; > > +#define cpsv_cpnd_shm_size(x) x == CPSV_CPND_SHM_VERSION_LONG_DN ? \ > + sizeof(CLIENT_HDR) + (MAX_CLIENTS * sizeof(CLIENT_INFO)) + \ > + sizeof(CKPT_HDR) + (MAX_CKPTS * sizeof(CKPT_INFO)) : \ > + sizeof(CLIENT_HDR) + (MAX_CLIENTS * sizeof(CLIENT_INFO)) + \ > + sizeof(CKPT_HDR) + (MAX_CKPTS * sizeof(CKPT_INFO_V0)) \ > + > #endif > diff --git a/osaf/services/saf/cpsv/cpnd/cpnd_res.c > b/osaf/services/saf/cpsv/cpnd/cpnd_res.c > --- a/osaf/services/saf/cpsv/cpnd/cpnd_res.c > +++ b/osaf/services/saf/cpsv/cpnd/cpnd_res.c > @@ -44,20 +44,34 @@ > > #define m_CPND_CKPTINFO_UPDATE(addr,ckpt_info,offset) > memcpy(addr+offset,&ckpt_info,sizeof(CKPT_INFO)) > > +#define m_CPND_CKPTINFO_V0_UPDATE(addr,ckpt_info,offset) > +memcpy(addr+offset,&ckpt_info,sizeof(CKPT_INFO_V0)) > + > #define m_CPND_CKPTHDR_UPDATE(ckpt_hdr,offset) > memcpy(offset,&ckpt_hdr,sizeof(CKPT_HDR)) > > +void *cpnd_restart_shm(NCS_OS_POSIX_SHM_REQ_INFO *cpnd_open_req, > +CPND_CB *cb, SaClmNodeIdT nodeid); uint32_t > +cpnd_update_ckpt_with_clienthdl_v1(CPND_CB *cb, CPND_CKPT_NODE > +*cp_node, SaCkptHandleT client_hdl); uint32_t > +cpnd_update_ckpt_with_clienthdl_v0(CPND_CB *cb, CPND_CKPT_NODE > +*cp_node, SaCkptHandleT client_hdl); uint32_t > +cpnd_write_ckpt_info_v1(CPND_CB *cb, CPND_CKPT_NODE *cp_node, i
Re: [devel] [PATCH 1 of 1] cpnd: use shared memory based on ckpt name length [#2108]
Hi Hoang, >> - Run time cpnd keep using small format shm until first longDN checkpoint is created. >> After that cpnd use big format shm. While reviewing I am assuming following please confirm : -The existing `small format shm` will continue to be small , is that right ? -Only newly created longDN checkpoint will be in `big format shm`, is that right ? - what will be the format of newly joined the PL-5 opens an existing `small format shm` the what will be the new replica on new node `small format shm` or `big format shm` ? I hope you tested following : == - combination of some `small format shm` and some `big format shm` ckpts - Joined a New node ( say PL-5) and then opened the existing `small format shm` ckpt from the new Node - Restating controller which has combination of `small format shm` and `big format shm` and how the restored non-collocated ckpt`s -AVM On 10/11/2016 1:15 PM, Hoang Vo wrote: > osaf/libs/common/cpsv/include/cpsv_shm.h |9 +- > osaf/services/saf/cpsv/cpnd/cpnd_res.c | 565 > -- > 2 files changed, 536 insertions(+), 38 deletions(-) > > > problem: In the case of CKPT osafckptnd increased 3,5Mb - 240 percent on all > nodes > CKPT_INFO size inscrease when support longDN lead to total size increase. > > solution: > - From start, cpnd use small format shm. > - Run time cpnd keep using small format shm until first longDN checkpoint is > created. > After that cpnd use big format shm. > > diff --git a/osaf/libs/common/cpsv/include/cpsv_shm.h > b/osaf/libs/common/cpsv/include/cpsv_shm.h > --- a/osaf/libs/common/cpsv/include/cpsv_shm.h > +++ b/osaf/libs/common/cpsv/include/cpsv_shm.h > @@ -27,7 +27,8 @@ > #define SHM_NEXT -3 > #define SHM_INIT -1 > > -#define CPSV_CPND_SHM_VERSION1 > +#define CPSV_CPND_SHM_VERSION_SHORT_DN 0 > +#define CPSV_CPND_SHM_VERSION_LONG_DN1 > > typedef struct cpsv_ckpt_hdr { > SaCkptCheckpointHandleT ckpt_id;/* Index for identifying the > checkpoint */ > @@ -134,4 +135,10 @@ typedef enum cpnd_type_info { > CPND_CKPT_INFO > } CPND_TYPE_INFO; > > +#define cpsv_cpnd_shm_size(x) x == CPSV_CPND_SHM_VERSION_LONG_DN ? \ > + sizeof(CLIENT_HDR) + (MAX_CLIENTS * sizeof(CLIENT_INFO)) + > \ > + sizeof(CKPT_HDR) + (MAX_CKPTS * sizeof(CKPT_INFO)) : > \ > + sizeof(CLIENT_HDR) + (MAX_CLIENTS * sizeof(CLIENT_INFO)) + > \ > + sizeof(CKPT_HDR) + (MAX_CKPTS * sizeof(CKPT_INFO_V0)) > \ > + > #endif > diff --git a/osaf/services/saf/cpsv/cpnd/cpnd_res.c > b/osaf/services/saf/cpsv/cpnd/cpnd_res.c > --- a/osaf/services/saf/cpsv/cpnd/cpnd_res.c > +++ b/osaf/services/saf/cpsv/cpnd/cpnd_res.c > @@ -44,20 +44,34 @@ > > #define m_CPND_CKPTINFO_UPDATE(addr,ckpt_info,offset) > memcpy(addr+offset,&ckpt_info,sizeof(CKPT_INFO)) > > +#define m_CPND_CKPTINFO_V0_UPDATE(addr,ckpt_info,offset) > memcpy(addr+offset,&ckpt_info,sizeof(CKPT_INFO_V0)) > + > #define m_CPND_CKPTHDR_UPDATE(ckpt_hdr,offset) > memcpy(offset,&ckpt_hdr,sizeof(CKPT_HDR)) > > +void *cpnd_restart_shm(NCS_OS_POSIX_SHM_REQ_INFO *cpnd_open_req, CPND_CB > *cb, SaClmNodeIdT nodeid); > +uint32_t cpnd_update_ckpt_with_clienthdl_v1(CPND_CB *cb, CPND_CKPT_NODE > *cp_node, SaCkptHandleT client_hdl); > +uint32_t cpnd_update_ckpt_with_clienthdl_v0(CPND_CB *cb, CPND_CKPT_NODE > *cp_node, SaCkptHandleT client_hdl); > +uint32_t cpnd_write_ckpt_info_v1(CPND_CB *cb, CPND_CKPT_NODE *cp_node, > int32_t offset, SaCkptHandleT client_hdl); > +uint32_t cpnd_write_ckpt_info_v0(CPND_CB *cb, CPND_CKPT_NODE *cp_node, > int32_t offset, SaCkptHandleT client_hdl); > + > static uint32_t cpnd_res_ckpt_sec_add(CPND_CKPT_SECTION_INFO *pSecPtr, > CPND_CKPT_NODE *cp_node); > static bool cpnd_find_exact_ckptinfo(CPND_CB *cb, CKPT_INFO *ckpt_info, > uint32_t bitmap_offset, >uint32_t *offset, uint32_t > *prev_offset); > +static bool cpnd_find_exact_ckptinfo_v0(CPND_CB *cb, CKPT_INFO_V0 > *ckpt_info, uint32_t bitmap_offset, > + uint32_t *offset, uint32_t > *prev_offset); > static void cpnd_clear_ckpt_info(CPND_CB *cb, CPND_CKPT_NODE *cp_node, > uint32_t curr_offset, uint32_t prev_offset); > static uint32_t cpnd_restore_client_info(CPND_CB *cb, uint8_t *cli_addr); > static uint32_t cpnd_restore_ckpt_info_v1(CPND_CB *cb, uint8_t *ckpt_addr, > SaClmNodeIdT nodeid); > static uint32_t cpnd_restore_ckpt_info_v0(CPND_CB *cb, uint8_t *ckpt_addr, > SaClmNodeIdT nodeid); > +static void cpnd_restart_client_reset_v1(CPND_CB *cb, CPND_CKPT_NODE > *cp_node, CPND_CKPT_CLIENT_NODE *cl_node); > +static void cpnd_restart_client_reset_v0(CPND_CB *cb, CPND_CKPT_NODE > *cp_node, CPND_CKPT_CLIENT_NODE *cl_node); > static void cpnd_destroy_shm_cpnd_cp_info(NCS_OS_POSIX_SHM_REQ_OPEN_INFO > *open_req); > static void *cpnd_create_shm_cpnd_cp
[devel] [PATCH 1 of 1] cpnd: use shared memory based on ckpt name length [#2108]
osaf/libs/common/cpsv/include/cpsv_shm.h |9 +- osaf/services/saf/cpsv/cpnd/cpnd_res.c | 565 -- 2 files changed, 536 insertions(+), 38 deletions(-) problem: In the case of CKPT osafckptnd increased 3,5Mb - 240 percent on all nodes CKPT_INFO size inscrease when support longDN lead to total size increase. solution: - From start, cpnd use small format shm. - Run time cpnd keep using small format shm until first longDN checkpoint is created. After that cpnd use big format shm. diff --git a/osaf/libs/common/cpsv/include/cpsv_shm.h b/osaf/libs/common/cpsv/include/cpsv_shm.h --- a/osaf/libs/common/cpsv/include/cpsv_shm.h +++ b/osaf/libs/common/cpsv/include/cpsv_shm.h @@ -27,7 +27,8 @@ #define SHM_NEXT -3 #define SHM_INIT -1 -#define CPSV_CPND_SHM_VERSION1 +#define CPSV_CPND_SHM_VERSION_SHORT_DN 0 +#define CPSV_CPND_SHM_VERSION_LONG_DN 1 typedef struct cpsv_ckpt_hdr { SaCkptCheckpointHandleT ckpt_id;/* Index for identifying the checkpoint */ @@ -134,4 +135,10 @@ typedef enum cpnd_type_info { CPND_CKPT_INFO } CPND_TYPE_INFO; +#define cpsv_cpnd_shm_size(x) x == CPSV_CPND_SHM_VERSION_LONG_DN ? \ + sizeof(CLIENT_HDR) + (MAX_CLIENTS * sizeof(CLIENT_INFO)) + \ + sizeof(CKPT_HDR) + (MAX_CKPTS * sizeof(CKPT_INFO)) : \ + sizeof(CLIENT_HDR) + (MAX_CLIENTS * sizeof(CLIENT_INFO)) + \ + sizeof(CKPT_HDR) + (MAX_CKPTS * sizeof(CKPT_INFO_V0)) \ + #endif diff --git a/osaf/services/saf/cpsv/cpnd/cpnd_res.c b/osaf/services/saf/cpsv/cpnd/cpnd_res.c --- a/osaf/services/saf/cpsv/cpnd/cpnd_res.c +++ b/osaf/services/saf/cpsv/cpnd/cpnd_res.c @@ -44,20 +44,34 @@ #define m_CPND_CKPTINFO_UPDATE(addr,ckpt_info,offset) memcpy(addr+offset,&ckpt_info,sizeof(CKPT_INFO)) +#define m_CPND_CKPTINFO_V0_UPDATE(addr,ckpt_info,offset) memcpy(addr+offset,&ckpt_info,sizeof(CKPT_INFO_V0)) + #define m_CPND_CKPTHDR_UPDATE(ckpt_hdr,offset) memcpy(offset,&ckpt_hdr,sizeof(CKPT_HDR)) +void *cpnd_restart_shm(NCS_OS_POSIX_SHM_REQ_INFO *cpnd_open_req, CPND_CB *cb, SaClmNodeIdT nodeid); +uint32_t cpnd_update_ckpt_with_clienthdl_v1(CPND_CB *cb, CPND_CKPT_NODE *cp_node, SaCkptHandleT client_hdl); +uint32_t cpnd_update_ckpt_with_clienthdl_v0(CPND_CB *cb, CPND_CKPT_NODE *cp_node, SaCkptHandleT client_hdl); +uint32_t cpnd_write_ckpt_info_v1(CPND_CB *cb, CPND_CKPT_NODE *cp_node, int32_t offset, SaCkptHandleT client_hdl); +uint32_t cpnd_write_ckpt_info_v0(CPND_CB *cb, CPND_CKPT_NODE *cp_node, int32_t offset, SaCkptHandleT client_hdl); + static uint32_t cpnd_res_ckpt_sec_add(CPND_CKPT_SECTION_INFO *pSecPtr, CPND_CKPT_NODE *cp_node); static bool cpnd_find_exact_ckptinfo(CPND_CB *cb, CKPT_INFO *ckpt_info, uint32_t bitmap_offset, uint32_t *offset, uint32_t *prev_offset); +static bool cpnd_find_exact_ckptinfo_v0(CPND_CB *cb, CKPT_INFO_V0 *ckpt_info, uint32_t bitmap_offset, +uint32_t *offset, uint32_t *prev_offset); static void cpnd_clear_ckpt_info(CPND_CB *cb, CPND_CKPT_NODE *cp_node, uint32_t curr_offset, uint32_t prev_offset); static uint32_t cpnd_restore_client_info(CPND_CB *cb, uint8_t *cli_addr); static uint32_t cpnd_restore_ckpt_info_v1(CPND_CB *cb, uint8_t *ckpt_addr, SaClmNodeIdT nodeid); static uint32_t cpnd_restore_ckpt_info_v0(CPND_CB *cb, uint8_t *ckpt_addr, SaClmNodeIdT nodeid); +static void cpnd_restart_client_reset_v1(CPND_CB *cb, CPND_CKPT_NODE *cp_node, CPND_CKPT_CLIENT_NODE *cl_node); +static void cpnd_restart_client_reset_v0(CPND_CB *cb, CPND_CKPT_NODE *cp_node, CPND_CKPT_CLIENT_NODE *cl_node); static void cpnd_destroy_shm_cpnd_cp_info(NCS_OS_POSIX_SHM_REQ_OPEN_INFO *open_req); static void *cpnd_create_shm_cpnd_cp_info(NCS_OS_POSIX_SHM_REQ_INFO *req_info); static void cpnd_update_shm_cpnd_cp_info(CPND_CB *cb); static void cpnd_convert_cp_info_v0(CKPT_INFO_V0 *cp_info_v0, CKPT_INFO *cp_info); +static CPND_SHM_VERSION cpnd_shm_version; + /*** * * Name : cpnd_client_extract_bits * @@ -296,12 +310,21 @@ void cpnd_restart_update_timer(CPND_CB * CKPT_INFO ckpt_info; memset(&ckpt_info, '\0', sizeof(CKPT_INFO)); - if (cp_node->offset >= 0) { + if (cp_node->offset < 0) { + return; + } + if (cpnd_shm_version.shm_version == CPSV_CPND_SHM_VERSION_LONG_DN) { m_CPND_CKPTINFO_READ(ckpt_info, (char *)cb->shm_addr.ckpt_addr + sizeof(CKPT_HDR), cp_node->offset * sizeof(CKPT_INFO)); ckpt_info.close_time = closetime; m_CPND_CKPTINFO_UPDATE((char *)cb->shm_addr.ckpt_addr + sizeof(CKPT_HDR), ckpt_info, cp_node->offset * sizeof(CKPT_INFO)); + } else { + m_CPND_CKPTINFO_V0_READ(ckpt_info, (char