Hi!

I don't think the sender would need to unconditionally abort() down in 
the MDS layer when it gets back an undelivered message from TIPC. We 
have the message loss callback in MDS, which can be used by the receiver 
to detect lost messages. The receiver can take an appropriate action 
when it receives this callback. If the appropriate action is to restart 
the sender, then the receiver can inform the sender about the message 
loss so that the sender can restart itself.

regards,

Anders Widell

On 08/23/2016 07:21 AM, A V Mahesh wrote:
> Hi HansN,
>
> Let us fist discuss the error handling and abort, then we can come 
> back to
> interpretation of  TIPC currently  does permit  OR does not permit an 
> application to send
> a multicast message with the "destination droppable" setting disabled.
>
> Let us disable TIPC_DEST_DROPPABLE, so that  TIPC will try to return 
> an undelivered multicast message to its sender
> and we can  determine issue is  because of TIPC_ERR_OVERLOAD, this 
> helps in debugging ,
> so that application may increased SO_SNDBUF/SO_RCVBUF to reduce the 
> problem.
>
> But still we need to abort(), the reason for that is current MDS 
> implementations doesn't
> have flow control logic ( no retry because of error ) , so Application 
> like AMF can go wrong and cluster will go into unstable/recoverble state.
>
> This was the reason we haven't gone for it while addressing Ticket 
> #1227 (https://sourceforge.net/p/opensaf/mailman/message/33207717/)
> So currently we don't have any advantage of disabling 
> TIPC_DEST_DROPPABLE and not allowing multicast  messages.
>
> -AVM
>
>
> On 8/18/2016 2:43 PM, Hans Nordeback wrote:
>>   osaf/libs/core/mds/mds_dt_tipc.c |  32 
>> +++++++++++++++++++++++++-------
>>   1 files changed, 25 insertions(+), 7 deletions(-)
>>
>>
>> diff --git a/osaf/libs/core/mds/mds_dt_tipc.c 
>> b/osaf/libs/core/mds/mds_dt_tipc.c
>> --- a/osaf/libs/core/mds/mds_dt_tipc.c
>> +++ b/osaf/libs/core/mds/mds_dt_tipc.c
>> @@ -320,6 +320,15 @@ uint32_t mdtm_tipc_init(NODE_ID nodeid,
>>                   m_MDS_LOG_INFO("MDTM: Successfully set default 
>> socket option TIPC_IMP = %d", TIPCIMPORTANCE);
>>           }
>>   +        int droppable = 0;
>> +        if (setsockopt(tipc_cb.BSRsock, SOL_TIPC, 
>> TIPC_DEST_DROPPABLE, &droppable, sizeof(droppable)) != 0) {
>> +                LOG_ER("MDTM: Can't set TIPC_DEST_DROPPABLE to zero 
>> err :%s\n", strerror(errno));
>> +                m_MDS_LOG_ERR("MDTM: Can't set TIPC_DEST_DROPPABLE 
>> to zero err :%s\n", strerror(errno));
>> +                osafassert(0);
>> +        } else {
>> +                m_MDS_LOG_NOTIFY("MDTM: Successfully set 
>> TIPC_DEST_DROPPABLE to zero");
>> +        }
>> +
>>       return NCSCC_RC_SUCCESS;
>>   }
>>   @@ -563,6 +572,8 @@ ssize_t recvfrom_connectionless (int sd,
>>       unsigned char *cptr;
>>       int i;
>>       int has_addr;
>> +    int anc_data[2];
>> +
>>       ssize_t sz;
>>         has_addr = (from != NULL) && (addrlen != NULL);
>> @@ -591,19 +602,26 @@ ssize_t recvfrom_connectionless (int sd,
>>                  if the message was sent using a TIPC name or name 
>> sequence as the
>>                  destination rather than a TIPC port ID So abort for 
>> TIPC_ERRINFO and TIPC_RETDATA*/
>>               if (anc->cmsg_type == TIPC_ERRINFO) {
>> -                /* TIPC_ERRINFO - TIPC error code associated with a 
>> returned data message or a connection termination message  so abort */
>> -                m_MDS_LOG_CRITICAL("MDTM: undelivered message 
>> condition ancillary data: TIPC_ERRINFO abort err :%s", 
>> strerror(errno) );
>> -                abort();
>> +                anc_data[0] = *((unsigned int*)(CMSG_DATA(anc) + 0));
>> +                if (anc_data[0] == TIPC_ERR_OVERLOAD) {
>> +                    LOG_CR("MDTM: undelivered message condition 
>> ancillary data: TIPC_ERR_OVERLOAD");
>> +                    m_MDS_LOG_CRITICAL("MDTM: undelivered message 
>> condition ancillary data: TIPC_ERR_OVERLOAD");
>> +                } else {
>> +                    /* TIPC_ERRINFO - TIPC error code associated 
>> with a returned data message or a connection termination message  so 
>> abort */
>> +                    LOG_CR("MDTM: undelivered message condition 
>> ancillary data: TIPC_ERRINFO abort err : %d", anc_data[0]);
>> +                    m_MDS_LOG_CRITICAL("MDTM: undelivered message 
>> condition ancillary data: TIPC_ERRINFO abort err : %d", anc_data[0]);
>> +                }
>>               } else if (anc->cmsg_type == TIPC_RETDATA) {
>> -                /* If we set TIPC_DEST_DROPPABLE off messge 
>> (configure TIPC to return rejected messages to the sender )
>> +                /* If we set TIPC_DEST_DROPPABLE off message 
>> (configure TIPC to return rejected messages to the sender )
>>                      we will hit this when we implement MDS 
>> retransmit lost messages  abort can be replaced with flow control 
>> logic*/
>>                   for (i = anc->cmsg_len - sizeof(*anc); i > 0; i--) {
>> -                    m_MDS_LOG_DBG("MDTM: returned byte 0x%02x\n", 
>> *cptr);
>> +                    LOG_CR("MDTM: returned byte 0x%02x\n", *cptr);
>> +                    m_MDS_LOG_CRITICAL("MDTM: returned byte 
>> 0x%02x\n", *cptr);
>>                       cptr++;
>>                   }
>>                   /* TIPC_RETDATA -The contents of a returned data 
>> message  so abort */
>> -                m_MDS_LOG_CRITICAL("MDTM: undelivered message 
>> condition ancillary data: TIPC_RETDATA abort err :%s", 
>> strerror(errno) );
>> -                abort();
>> +                LOG_CR("MDTM: undelivered message condition 
>> ancillary data: TIPC_RETDATA");
>> +                m_MDS_LOG_CRITICAL("MDTM: undelivered message 
>> condition ancillary data: TIPC_RETDATA");
>>               } else if (anc->cmsg_type == TIPC_DESTNAME) {
>>                   if (sz == 0) {
>>                       m_MDS_LOG_DBG("MDTM: recd bytes=0 on received 
>> on sock, abnormal/unknown  condition. Ignoring");
>


------------------------------------------------------------------------------
_______________________________________________
Opensaf-devel mailing list
Opensaf-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/opensaf-devel

Reply via email to