[devel] [PATCH 1/1] mds: Fix mds flow control keep all messages in queue [#3123]

2019-11-27 Thread thuan.tran
When overflow happens, mds with flow control enabled may keep
all messages in queue if it fails to send a message when receiving
Nack or ChunkAck since no more trigger come after that.
MDS flow control should retry to send message in this scenario.
---
 src/mds/mds_tipc_fctrl_portid.cc | 47 ++--
 1 file changed, 32 insertions(+), 15 deletions(-)

diff --git a/src/mds/mds_tipc_fctrl_portid.cc b/src/mds/mds_tipc_fctrl_portid.cc
index 316e1ba75..d5314d5bc 100644
--- a/src/mds/mds_tipc_fctrl_portid.cc
+++ b/src/mds/mds_tipc_fctrl_portid.cc
@@ -17,6 +17,7 @@
 
 #include "mds/mds_tipc_fctrl_portid.h"
 #include "base/ncssysf_def.h"
+#include "base/osaf_time.h"
 
 #include "mds/mds_dt.h"
 #include "mds/mds_log.h"
@@ -149,23 +150,24 @@ void TipcPortId::FlushData() {
 
 uint32_t TipcPortId::Send(uint8_t* data, uint16_t length) {
   struct sockaddr_tipc server_addr;
-  ssize_t send_len = 0;
-  uint32_t rc = NCSCC_RC_SUCCESS;
-
   memset(_addr, 0, sizeof(server_addr));
   server_addr.family = AF_TIPC;
   server_addr.addrtype = TIPC_ADDR_ID;
   server_addr.addr.id = id_;
-  send_len = sendto(bsrsock_, data, length, 0,
-(struct sockaddr *)_addr, sizeof(server_addr));
-
-  if (send_len == length) {
-rc = NCSCC_RC_SUCCESS;
-  } else {
-m_MDS_LOG_ERR("FCTRL: sendto() failed, Error[%s]", strerror(errno));
-rc = NCSCC_RC_FAILURE;
+  int retry = 5;
+  while (retry >= 0) {
+ssize_t send_len = sendto(bsrsock_, data, length, 0,
+  (struct sockaddr *)_addr, sizeof(server_addr));
+
+if (send_len == length) {
+  return NCSCC_RC_SUCCESS;
+} else if (retry-- > 0) {
+  assert(errno == ENOMEM || errno == ENOBUFS);
+  osaf_nanosleep();
+}
   }
-  return rc;
+  m_MDS_LOG_ERR("FCTRL: sendto() failed, Error[%s]", strerror(errno));
+  return NCSCC_RC_FAILURE;
 }
 
 uint32_t TipcPortId::Queue(const uint8_t* data, uint16_t length,
@@ -440,13 +442,16 @@ void TipcPortId::ReceiveChunkAck(uint16_t fseq, uint16_t 
chksize) {
 // try to send a few pending msg
 DataMessage* msg = nullptr;
 uint16_t send_msg_cnt = 0;
-while (send_msg_cnt++ < chunk_size_) {
+int retry = 0;
+while (send_msg_cnt < chunk_size_) {
   // find the lowest sequence unsent yet
   msg = sndqueue_.FirstUnsent();
   if (msg == nullptr) {
 break;
   } else {
   if (Send(msg->msg_data_, msg->header_.msg_len_) == NCSCC_RC_SUCCESS) 
{
+retry = 0;
+send_msg_cnt++;
 msg->is_sent_ = true;
 m_MDS_LOG_NOTIFY("FCTRL: [me] --> [node:%x, ref:%u], "
 "SndQData[fseq:%u, len:%u], "
@@ -454,6 +459,12 @@ void TipcPortId::ReceiveChunkAck(uint16_t fseq, uint16_t 
chksize) {
 id_.node, id_.ref,
 msg->header_.fseq_, msg->header_.msg_len_,
 sndwnd_.acked_.v(), sndwnd_.send_.v(), sndwnd_.nacked_space_);
+  } else if (send_msg_cnt == 0) {
+// If not retry, all messages are kept in queue
+// and no more trigger to send messages
+retry++;
+assert(retry < 100);
+continue;
   } else {
 break;
   }
@@ -508,9 +519,15 @@ void TipcPortId::ReceiveNack(uint32_t mseq, uint16_t mfrag,
   DataMessage* msg = sndqueue_.Find(Seq16(fseq));
   if (msg != nullptr) {
 // Resend the msg found
-if (Send(msg->msg_data_, msg->header_.msg_len_) == NCSCC_RC_SUCCESS) {
-  msg->is_sent_ = true;
+int retry = 0;
+while (Send(msg->msg_data_, msg->header_.msg_len_) != NCSCC_RC_SUCCESS) {
+  // If not retry, all messages are kept in queue
+  // and no more trigger to send messages
+  retry++;
+  assert(retry < 100);
+  continue;
 }
+msg->is_sent_ = true;
 m_MDS_LOG_NOTIFY("FCTRL: [me] --> [node:%x, ref:%u], "
 "RsndData[mseq:%u, mfrag:%u, fseq:%u], "
 "sndwnd[acked:%u, send:%u, nacked:%" PRIu64 "]",
-- 
2.17.1



___
Opensaf-devel mailing list
Opensaf-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/opensaf-devel


[devel] [PATCH 0/1] Review Request for mds: Fix mds flow control keep all messages in queue [#3123] V3

2019-11-27 Thread thuan.tran
Summary: mds: Fix mds flow control keep all messages in queue [#3123]
Review request for Ticket(s): 3123
Peer Reviewer(s): Minh, Vu, Thang, Gary
Pull request to: *** LIST THE PERSON WITH PUSH ACCESS HERE ***
Affected branch(es): develop
Development branch: ticket-3123
Base revision: 8e07c19aed63c249f4e7fa8470270d2de1a56046
Personal repository: git://git.code.sf.net/u/thuantr/review


Impacted area   Impact y/n

 Docsn
 Build systemn
 RPM/packaging   n
 Configuration files n
 Startup scripts n
 SAF servicesn
 OpenSAF servicesn
 Core libraries  y
 Samples n
 Tests   n
 Other   n


Comments (indicate scope for each "y" above):
-
N/A

revision 3a137597379e532f70afb79a7104b689f1bb79b3
Author: thuan.tran 
Date:   Thu, 28 Nov 2019 14:43:40 +0700

mds: Fix mds flow control keep all messages in queue [#3123]

When overflow happens, mds with flow control enabled may keep
all messages in queue if it fails to send a message when receiving
Nack or ChunkAck since no more trigger come after that.
MDS flow control should retry to send message in this scenario.



Complete diffstat:
--
 src/mds/mds_tipc_fctrl_portid.cc | 47 +++-
 1 file changed, 32 insertions(+), 15 deletions(-)


Testing Commands:
-
N/A

Testing, Expected Results:
--
N/A

Conditions of Submission:
-
ACK by reviewers

Arch  Built StartedLinux distro
---
mipsn  n
mips64  n  n
x86 n  n
x86_64  y  y
powerpc n  n
powerpc64   n  n


Reviewer Checklist:
---
[Submitters: make sure that your review doesn't trigger any checkmarks!]


Your checkin has not passed review because (see checked entries):

___ Your RR template is generally incomplete; it has too many blank entries
that need proper data filled in.

___ You have failed to nominate the proper persons for review and push.

___ Your patches do not have proper short+long header

___ You have grammar/spelling in your header that is unacceptable.

___ You have exceeded a sensible line length in your headers/comments/text.

___ You have failed to put in a proper Trac Ticket # into your commits.

___ You have incorrectly put/left internal data in your comments/files
(i.e. internal bug tracking tool IDs, product names etc)

___ You have not given any evidence of testing beyond basic build tests.
Demonstrate some level of runtime or other sanity testing.

___ You have ^M present in some of your files. These have to be removed.

___ You have needlessly changed whitespace or added whitespace crimes
like trailing spaces, or spaces before tabs.

___ You have mixed real technical changes with whitespace and other
cosmetic code cleanup changes. These have to be separate commits.

___ You need to refactor your submission into logical chunks; there is
too much content into a single commit.

___ You have extraneous garbage in your review (merge commits etc)

___ You have giant attachments which should never have been sent;
Instead you should place your content in a public tree to be pulled.

___ You have too many commits attached to an e-mail; resend as threaded
commits, or place in a public tree for a pull.

___ You have resent this content multiple times without a clear indication
of what has changed between each re-send.

___ You have failed to adequately and individually address all of the
comments and change requests that were proposed in the initial review.

___ You have a misconfigured ~/.gitconfig file (i.e. user.name, user.email etc)

___ Your computer have a badly configured date and time; confusing the
the threaded patch review.

___ Your changes affect IPC mechanism, and you don't present any results
for in-service upgradability test.

___ Your changes affect user manual and documentation, your patch series
do not contain the patch that updates the Doxygen manual.



___
Opensaf-devel mailing list
Opensaf-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/opensaf-devel


Re: [devel] [PATCH 1/1] mds: Fix mds flow control keep all messages in queue [#3123]

2019-11-27 Thread Tran Thuan
Hi Vu,

Thanks. See my reply inline.

Best Regards,
ThuanTr

-Original Message-
From: Nguyen Minh Vu  
Sent: Thursday, November 28, 2019 10:36 AM
To: thuan.tran ; 'Minh Hon Chau' 
; 'thang . d . nguyen' 
; gary@dektech.com.au
Cc: opensaf-devel@lists.sourceforge.net
Subject: Re: [PATCH 1/1] mds: Fix mds flow control keep all messages in queue 
[#3123]

Hi Thuan,

Ack with comments inline.

Regards, Vu

On 11/27/19 6:33 PM, thuan.tran wrote:
> When overflow happens, mds with flow control enabled may keep
> all messages in queue if it fails to send a message when receiving
> Nack or ChunkAck since no more trigger come after that.
> MDS flow control should retry to send message in this scenario.
> ---
>   src/mds/mds_tipc_fctrl_portid.cc | 39 +++-
>   1 file changed, 23 insertions(+), 16 deletions(-)
>
> diff --git a/src/mds/mds_tipc_fctrl_portid.cc 
> b/src/mds/mds_tipc_fctrl_portid.cc
> index 316e1ba75..8081e8bd4 100644
> --- a/src/mds/mds_tipc_fctrl_portid.cc
> +++ b/src/mds/mds_tipc_fctrl_portid.cc
> @@ -17,6 +17,7 @@
>   
>   #include "mds/mds_tipc_fctrl_portid.h"
>   #include "base/ncssysf_def.h"
> +#include "base/osaf_time.h"
>   
>   #include "mds/mds_dt.h"
>   #include "mds/mds_log.h"
> @@ -149,23 +150,23 @@ void TipcPortId::FlushData() {
>   
>   uint32_t TipcPortId::Send(uint8_t* data, uint16_t length) {
> struct sockaddr_tipc server_addr;
> -  ssize_t send_len = 0;
> -  uint32_t rc = NCSCC_RC_SUCCESS;
> -
> memset(_addr, 0, sizeof(server_addr));
> server_addr.family = AF_TIPC;
> server_addr.addrtype = TIPC_ADDR_ID;
> server_addr.addr.id = id_;
> -  send_len = sendto(bsrsock_, data, length, 0,
> -(struct sockaddr *)_addr, sizeof(server_addr));
> -
> -  if (send_len == length) {
> -rc = NCSCC_RC_SUCCESS;
> -  } else {
> -m_MDS_LOG_ERR("FCTRL: sendto() failed, Error[%s]", strerror(errno));
> -rc = NCSCC_RC_FAILURE;
> +  int retry = 5;
> +  while (retry >= 0) {
> +ssize_t send_len = sendto(bsrsock_, data, length, 0,
> +  (struct sockaddr *)_addr, sizeof(server_addr));
> +
[Vu] Any case the sendto just sends a part of data? if so, the retry if 
any should not start from the beginning of data.
the below code shows what i meant:

ssize_t byte_sent = 0;
while (retry--) {
   ssize_t send_len = sendto(bsrsock_, data + byte_sent, length - byte_sent, 0,
  (struct sockaddr *)_addr, sizeof(server_addr));
if (send_lenn == -1) {
  // error handling here
  if (errno == EINTR) continue;
  // error, can't continue. should log something here?
  return NCSCC_RC_FAILURE; // or assert?
}

// number of bytes sent
byte_sent += send_data;
 if (byte_sent >= length) {
   return NCSCC_RC_SUCCESS;
 }
 
 // retry but do we need to sleep here?
 osaf_nanosleep();
}
  
[Thuan]: I think there is no case send a part of message.
Even if yes, the incomplete message is not accept by receiver.
Receiver don't have reassemble for unfragmented message.

> +if (send_len == length) {
> +  return NCSCC_RC_SUCCESS;
> +} else if (retry-- > 0) {
> +  osaf_nanosleep();
> +}
> }
> -  return rc;
> +  m_MDS_LOG_ERR("FCTRL: sendto() failed, Error[%s]", strerror(errno));
> +  return NCSCC_RC_FAILURE;
>   }
>   
>   uint32_t TipcPortId::Queue(const uint8_t* data, uint16_t length,
> @@ -440,13 +441,14 @@ void TipcPortId::ReceiveChunkAck(uint16_t fseq, 
> uint16_t chksize) {
>   // try to send a few pending msg
>   DataMessage* msg = nullptr;
>   uint16_t send_msg_cnt = 0;
> -while (send_msg_cnt++ < chunk_size_) {
> +while (send_msg_cnt < chunk_size_) {
> // find the lowest sequence unsent yet
> msg = sndqueue_.FirstUnsent();
> if (msg == nullptr) {
>   break;
> } else {
> if (Send(msg->msg_data_, msg->header_.msg_len_) == 
> NCSCC_RC_SUCCESS) {
> +send_msg_cnt++;
>   msg->is_sent_ = true;
>   m_MDS_LOG_NOTIFY("FCTRL: [me] --> [node:%x, ref:%u], "
>   "SndQData[fseq:%u, len:%u], "
> @@ -455,7 +457,9 @@ void TipcPortId::ReceiveChunkAck(uint16_t fseq, uint16_t 
> chksize) {
>   msg->header_.fseq_, msg->header_.msg_len_,
>   sndwnd_.acked_.v(), sndwnd_.send_.v(), 
> sndwnd_.nacked_space_);
> } else {
> -break;
> +// If not retry, all messages are kept in queue
> +// and no more trigger to send messages
> +continue;
[Vu] If send is constantly failed, this loop has no way to exit?
[Thuan] Yes
> }
> }
>   }
> @@ -508,9 +512,12 @@ void TipcPortId::ReceiveNack(uint32_t mseq, uint16_t 
> mfrag,
> DataMessage* msg = sndqueue_.Find(Seq16(fseq));
> if (msg != nullptr) {
>   // Resend the msg found
> -if (Send(msg->msg_data_, msg->header_.msg_len_) == NCSCC_RC_SUCCESS) {
> -  msg->is_sent_ = true;
> +while (Send(msg->msg_data_, 

Re: [devel] [PATCH 1/1] mds: Fix mds flow control keep all messages in queue [#3123]

2019-11-27 Thread Nguyen Minh Vu

Hi Thuan,

Ack with comments inline.

Regards, Vu

On 11/27/19 6:33 PM, thuan.tran wrote:

When overflow happens, mds with flow control enabled may keep
all messages in queue if it fails to send a message when receiving
Nack or ChunkAck since no more trigger come after that.
MDS flow control should retry to send message in this scenario.
---
  src/mds/mds_tipc_fctrl_portid.cc | 39 +++-
  1 file changed, 23 insertions(+), 16 deletions(-)

diff --git a/src/mds/mds_tipc_fctrl_portid.cc b/src/mds/mds_tipc_fctrl_portid.cc
index 316e1ba75..8081e8bd4 100644
--- a/src/mds/mds_tipc_fctrl_portid.cc
+++ b/src/mds/mds_tipc_fctrl_portid.cc
@@ -17,6 +17,7 @@
  
  #include "mds/mds_tipc_fctrl_portid.h"

  #include "base/ncssysf_def.h"
+#include "base/osaf_time.h"
  
  #include "mds/mds_dt.h"

  #include "mds/mds_log.h"
@@ -149,23 +150,23 @@ void TipcPortId::FlushData() {
  
  uint32_t TipcPortId::Send(uint8_t* data, uint16_t length) {

struct sockaddr_tipc server_addr;
-  ssize_t send_len = 0;
-  uint32_t rc = NCSCC_RC_SUCCESS;
-
memset(_addr, 0, sizeof(server_addr));
server_addr.family = AF_TIPC;
server_addr.addrtype = TIPC_ADDR_ID;
server_addr.addr.id = id_;
-  send_len = sendto(bsrsock_, data, length, 0,
-(struct sockaddr *)_addr, sizeof(server_addr));
-
-  if (send_len == length) {
-rc = NCSCC_RC_SUCCESS;
-  } else {
-m_MDS_LOG_ERR("FCTRL: sendto() failed, Error[%s]", strerror(errno));
-rc = NCSCC_RC_FAILURE;
+  int retry = 5;
+  while (retry >= 0) {
+ssize_t send_len = sendto(bsrsock_, data, length, 0,
+  (struct sockaddr *)_addr, sizeof(server_addr));
+
[Vu] Any case the sendto just sends a part of data? if so, the retry if 
any should not start from the beginning of data.

the below code shows what i meant:

ssize_t byte_sent = 0;
while (retry--) {
  ssize_t send_len = sendto(bsrsock_, data + byte_sent, length - byte_sent, 0,
 (struct sockaddr *)_addr, sizeof(server_addr));
   if (send_lenn == -1) {
 // error handling here
 if (errno == EINTR) continue;
 // error, can't continue. should log something here?
 return NCSCC_RC_FAILURE; // or assert?
   }
   
   // number of bytes sent

   byte_sent += send_data;
if (byte_sent >= length) {
  return NCSCC_RC_SUCCESS;
}

// retry but do we need to sleep here?

osaf_nanosleep();
}
 


+if (send_len == length) {
+  return NCSCC_RC_SUCCESS;
+} else if (retry-- > 0) {
+  osaf_nanosleep();
+}
}
-  return rc;
+  m_MDS_LOG_ERR("FCTRL: sendto() failed, Error[%s]", strerror(errno));
+  return NCSCC_RC_FAILURE;
  }
  
  uint32_t TipcPortId::Queue(const uint8_t* data, uint16_t length,

@@ -440,13 +441,14 @@ void TipcPortId::ReceiveChunkAck(uint16_t fseq, uint16_t 
chksize) {
  // try to send a few pending msg
  DataMessage* msg = nullptr;
  uint16_t send_msg_cnt = 0;
-while (send_msg_cnt++ < chunk_size_) {
+while (send_msg_cnt < chunk_size_) {
// find the lowest sequence unsent yet
msg = sndqueue_.FirstUnsent();
if (msg == nullptr) {
  break;
} else {
if (Send(msg->msg_data_, msg->header_.msg_len_) == 
NCSCC_RC_SUCCESS) {
+send_msg_cnt++;
  msg->is_sent_ = true;
  m_MDS_LOG_NOTIFY("FCTRL: [me] --> [node:%x, ref:%u], "
  "SndQData[fseq:%u, len:%u], "
@@ -455,7 +457,9 @@ void TipcPortId::ReceiveChunkAck(uint16_t fseq, uint16_t 
chksize) {
  msg->header_.fseq_, msg->header_.msg_len_,
  sndwnd_.acked_.v(), sndwnd_.send_.v(), sndwnd_.nacked_space_);
} else {
-break;
+// If not retry, all messages are kept in queue
+// and no more trigger to send messages
+continue;

[Vu] If send is constantly failed, this loop has no way to exit?

}
}
  }
@@ -508,9 +512,12 @@ void TipcPortId::ReceiveNack(uint32_t mseq, uint16_t mfrag,
DataMessage* msg = sndqueue_.Find(Seq16(fseq));
if (msg != nullptr) {
  // Resend the msg found
-if (Send(msg->msg_data_, msg->header_.msg_len_) == NCSCC_RC_SUCCESS) {
-  msg->is_sent_ = true;
+while (Send(msg->msg_data_, msg->header_.msg_len_) != NCSCC_RC_SUCCESS) {
+  // If not retry, all messages are kept in queue
+  // and no more trigger to send messages
+  continue;

[Vu] If send is constantly failed, this loop has no way to exit?

  }
+msg->is_sent_ = true;
  m_MDS_LOG_NOTIFY("FCTRL: [me] --> [node:%x, ref:%u], "
  "RsndData[mseq:%u, mfrag:%u, fseq:%u], "
  "sndwnd[acked:%u, send:%u, nacked:%" PRIu64 "]",




___
Opensaf-devel mailing list
Opensaf-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/opensaf-devel


[devel] [PATCH 1/1] mds: Fix mds flow control keep all messages in queue [#3123]

2019-11-27 Thread thuan.tran
When overflow happens, mds with flow control enabled may keep
all messages in queue if it fails to send a message when receiving
Nack or ChunkAck since no more trigger come after that.
MDS flow control should retry to send message in this scenario.
---
 src/mds/mds_tipc_fctrl_portid.cc | 39 +++-
 1 file changed, 23 insertions(+), 16 deletions(-)

diff --git a/src/mds/mds_tipc_fctrl_portid.cc b/src/mds/mds_tipc_fctrl_portid.cc
index 316e1ba75..8081e8bd4 100644
--- a/src/mds/mds_tipc_fctrl_portid.cc
+++ b/src/mds/mds_tipc_fctrl_portid.cc
@@ -17,6 +17,7 @@
 
 #include "mds/mds_tipc_fctrl_portid.h"
 #include "base/ncssysf_def.h"
+#include "base/osaf_time.h"
 
 #include "mds/mds_dt.h"
 #include "mds/mds_log.h"
@@ -149,23 +150,23 @@ void TipcPortId::FlushData() {
 
 uint32_t TipcPortId::Send(uint8_t* data, uint16_t length) {
   struct sockaddr_tipc server_addr;
-  ssize_t send_len = 0;
-  uint32_t rc = NCSCC_RC_SUCCESS;
-
   memset(_addr, 0, sizeof(server_addr));
   server_addr.family = AF_TIPC;
   server_addr.addrtype = TIPC_ADDR_ID;
   server_addr.addr.id = id_;
-  send_len = sendto(bsrsock_, data, length, 0,
-(struct sockaddr *)_addr, sizeof(server_addr));
-
-  if (send_len == length) {
-rc = NCSCC_RC_SUCCESS;
-  } else {
-m_MDS_LOG_ERR("FCTRL: sendto() failed, Error[%s]", strerror(errno));
-rc = NCSCC_RC_FAILURE;
+  int retry = 5;
+  while (retry >= 0) {
+ssize_t send_len = sendto(bsrsock_, data, length, 0,
+  (struct sockaddr *)_addr, sizeof(server_addr));
+
+if (send_len == length) {
+  return NCSCC_RC_SUCCESS;
+} else if (retry-- > 0) {
+  osaf_nanosleep();
+}
   }
-  return rc;
+  m_MDS_LOG_ERR("FCTRL: sendto() failed, Error[%s]", strerror(errno));
+  return NCSCC_RC_FAILURE;
 }
 
 uint32_t TipcPortId::Queue(const uint8_t* data, uint16_t length,
@@ -440,13 +441,14 @@ void TipcPortId::ReceiveChunkAck(uint16_t fseq, uint16_t 
chksize) {
 // try to send a few pending msg
 DataMessage* msg = nullptr;
 uint16_t send_msg_cnt = 0;
-while (send_msg_cnt++ < chunk_size_) {
+while (send_msg_cnt < chunk_size_) {
   // find the lowest sequence unsent yet
   msg = sndqueue_.FirstUnsent();
   if (msg == nullptr) {
 break;
   } else {
   if (Send(msg->msg_data_, msg->header_.msg_len_) == NCSCC_RC_SUCCESS) 
{
+send_msg_cnt++;
 msg->is_sent_ = true;
 m_MDS_LOG_NOTIFY("FCTRL: [me] --> [node:%x, ref:%u], "
 "SndQData[fseq:%u, len:%u], "
@@ -455,7 +457,9 @@ void TipcPortId::ReceiveChunkAck(uint16_t fseq, uint16_t 
chksize) {
 msg->header_.fseq_, msg->header_.msg_len_,
 sndwnd_.acked_.v(), sndwnd_.send_.v(), sndwnd_.nacked_space_);
   } else {
-break;
+// If not retry, all messages are kept in queue
+// and no more trigger to send messages
+continue;
   }
   }
 }
@@ -508,9 +512,12 @@ void TipcPortId::ReceiveNack(uint32_t mseq, uint16_t mfrag,
   DataMessage* msg = sndqueue_.Find(Seq16(fseq));
   if (msg != nullptr) {
 // Resend the msg found
-if (Send(msg->msg_data_, msg->header_.msg_len_) == NCSCC_RC_SUCCESS) {
-  msg->is_sent_ = true;
+while (Send(msg->msg_data_, msg->header_.msg_len_) != NCSCC_RC_SUCCESS) {
+  // If not retry, all messages are kept in queue
+  // and no more trigger to send messages
+  continue;
 }
+msg->is_sent_ = true;
 m_MDS_LOG_NOTIFY("FCTRL: [me] --> [node:%x, ref:%u], "
 "RsndData[mseq:%u, mfrag:%u, fseq:%u], "
 "sndwnd[acked:%u, send:%u, nacked:%" PRIu64 "]",
-- 
2.17.1



___
Opensaf-devel mailing list
Opensaf-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/opensaf-devel


[devel] [PATCH 0/1] Review Request for mds: Fix mds flow control keep all messages in queue [#3123] V2

2019-11-27 Thread thuan.tran
Summary: mds: Fix mds flow control keep all messages in queue [#3123]
Review request for Ticket(s): 3123
Peer Reviewer(s): Minh, Vu, Thang, Gary
Pull request to: *** LIST THE PERSON WITH PUSH ACCESS HERE ***
Affected branch(es): develop
Development branch: ticket-3123
Base revision: 8e07c19aed63c249f4e7fa8470270d2de1a56046
Personal repository: git://git.code.sf.net/u/thuantr/review


Impacted area   Impact y/n

 Docsn
 Build systemn
 RPM/packaging   n
 Configuration files n
 Startup scripts n
 SAF servicesn
 OpenSAF servicesn
 Core libraries  y
 Samples n
 Tests   n
 Other   n


Comments (indicate scope for each "y" above):
-
N/A

revision 0e47c44a9977fb1a047763a484655582c2687762
Author: thuan.tran 
Date:   Wed, 27 Nov 2019 18:24:20 +0700

mds: Fix mds flow control keep all messages in queue [#3123]

When overflow happens, mds with flow control enabled may keep
all messages in queue if it fails to send a message when receiving
Nack or ChunkAck since no more trigger come after that.
MDS flow control should retry to send message in this scenario.



Complete diffstat:
--
 src/mds/mds_tipc_fctrl_portid.cc | 39 +++
 1 file changed, 23 insertions(+), 16 deletions(-)


Testing Commands:
-
N/A

Testing, Expected Results:
--
N/A

Conditions of Submission:
-
ACK by reviewers

Arch  Built StartedLinux distro
---
mipsn  n
mips64  n  n
x86 n  n
x86_64  y  y
powerpc n  n
powerpc64   n  n


Reviewer Checklist:
---
[Submitters: make sure that your review doesn't trigger any checkmarks!]


Your checkin has not passed review because (see checked entries):

___ Your RR template is generally incomplete; it has too many blank entries
that need proper data filled in.

___ You have failed to nominate the proper persons for review and push.

___ Your patches do not have proper short+long header

___ You have grammar/spelling in your header that is unacceptable.

___ You have exceeded a sensible line length in your headers/comments/text.

___ You have failed to put in a proper Trac Ticket # into your commits.

___ You have incorrectly put/left internal data in your comments/files
(i.e. internal bug tracking tool IDs, product names etc)

___ You have not given any evidence of testing beyond basic build tests.
Demonstrate some level of runtime or other sanity testing.

___ You have ^M present in some of your files. These have to be removed.

___ You have needlessly changed whitespace or added whitespace crimes
like trailing spaces, or spaces before tabs.

___ You have mixed real technical changes with whitespace and other
cosmetic code cleanup changes. These have to be separate commits.

___ You need to refactor your submission into logical chunks; there is
too much content into a single commit.

___ You have extraneous garbage in your review (merge commits etc)

___ You have giant attachments which should never have been sent;
Instead you should place your content in a public tree to be pulled.

___ You have too many commits attached to an e-mail; resend as threaded
commits, or place in a public tree for a pull.

___ You have resent this content multiple times without a clear indication
of what has changed between each re-send.

___ You have failed to adequately and individually address all of the
comments and change requests that were proposed in the initial review.

___ You have a misconfigured ~/.gitconfig file (i.e. user.name, user.email etc)

___ Your computer have a badly configured date and time; confusing the
the threaded patch review.

___ Your changes affect IPC mechanism, and you don't present any results
for in-service upgradability test.

___ Your changes affect user manual and documentation, your patch series
do not contain the patch that updates the Doxygen manual.



___
Opensaf-devel mailing list
Opensaf-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/opensaf-devel


Re: [devel] [PATCH 2/2] mds: Avoid message reallocation [#3089]

2019-11-27 Thread Minh Hon Chau

Hi Thuan,

We should free() the memory at the same function level where the memory 
is allocated. The @buffer is passed to mdtm_sendto() could be from a 
stack memory (as it is used to be before this patch).


Thanks

Minh

On 27/11/19 5:40 pm, Tran Thuan wrote:

Hi Minh,

Why not free() inside mdtm_sendto() and mdtm_mcast_sendto()?
It will help reduce much code change.

Best Regards,
ThuanTr

-Original Message-
From: Minh Chau 
Sent: Tuesday, November 26, 2019 7:02 PM
To: thuan.t...@dektech.com.au; vu.m.ngu...@dektech.com.au; 
gary@dektech.com.au
Cc: opensaf-devel@lists.sourceforge.net; Minh Chau 
Subject: [PATCH 2/2] mds: Avoid message reallocation [#3089]

The patch avoids message reallocation if the message is in
retransmission queue
---
  src/mds/mds_dt_tipc.c| 42 +++-
  src/mds/mds_tipc_fctrl_intf.cc   |  6 --
  src/mds/mds_tipc_fctrl_intf.h|  4 ++--
  src/mds/mds_tipc_fctrl_msg.cc|  2 +-
  src/mds/mds_tipc_fctrl_portid.cc |  9 +++--
  5 files changed, 39 insertions(+), 24 deletions(-)

diff --git a/src/mds/mds_dt_tipc.c b/src/mds/mds_dt_tipc.c
index 16cf11b..866c370 100644
--- a/src/mds/mds_dt_tipc.c
+++ b/src/mds/mds_dt_tipc.c
@@ -120,7 +120,7 @@ uint32_t mds_mdtm_send_tipc(MDTM_SEND_REQ *req);
  
  /* Tipc actual send, can be made as Macro even*/

  static uint32_t mdtm_sendto(uint8_t *buffer, uint16_t buff_len,
-   struct tipc_portid tipc_id);
+   struct tipc_portid tipc_id, uint8_t *is_queued);
  static uint32_t mdtm_mcast_sendto(void *buffer, size_t size,
  const MDTM_SEND_REQ *req);
  
@@ -2643,7 +2643,8 @@ uint32_t mds_mdtm_send_tipc(MDTM_SEND_REQ *req)

if (req->snd_type == MDS_SENDTYPE_ACK ||
req->snd_type == MDS_SENDTYPE_RACK) {
uint8_t len = mds_and_mdtm_hdr_len;
-   uint8_t buffer_ack[len];
+   uint8_t *buffer_ack = calloc(1, len);
+   uint8_t is_queued = 0;
  
  			/* Add mds_hdr */

if (mdtm_add_mds_hdr(buffer_ack, req)
@@ -2657,18 +2658,24 @@ uint32_t mds_mdtm_send_tipc(MDTM_SEND_REQ *req)
_seq_num) == NCSCC_RC_FAILURE){
m_MDS_LOG_ERR("FCTRL: Failed to send message"
" len :%d", len);
+   free(buffer_ack);
return NCSCC_RC_FAILURE;
}
/* Add frag_hdr */
if (mdtm_add_frag_hdr(buffer_ack, len, frag_seq_num,
0, fctrl_seq_num) != NCSCC_RC_SUCCESS) {
+   free(buffer_ack);
return NCSCC_RC_FAILURE;
}
  
  			m_MDS_LOG_DBG("MDTM:Sending message with Service"

" Seqno=%d, TO Dest_Tipc_id=<0x%08x:%u> ",
req->svc_seq_num, tipc_id.node, tipc_id.ref);
-   return mdtm_sendto(buffer_ack, len, tipc_id);
+   status = mdtm_sendto(buffer_ack, len, tipc_id,
+   _queued);
+   if (is_queued == 0)
+   free(buffer_ack);
+   return status;
}
  
  		if (req->msg.encoding == MDS_ENC_TYPE_FLAT) {

@@ -2730,6 +2737,7 @@ uint32_t mds_mdtm_send_tipc(MDTM_SEND_REQ *req)
} else {
uint8_t *p8;
uint8_t *body = NULL;
+   uint8_t is_queued = 0;
  
  body = calloc(1, len +

mds_and_mdtm_hdr_len);
@@ -2824,7 +2832,7 @@ uint32_t mds_mdtm_send_tipc(MDTM_SEND_REQ *req)
return NCSCC_RC_FAILURE;
}
} else {
-   if (mdtm_sendto(body, len, tipc_id)
+   if (mdtm_sendto(body, len, tipc_id, 
_queued)
!= NCSCC_RC_SUCCESS) {
m_MDS_LOG_ERR("MDTM: Unable to"
" send the msg thru"
@@ -2835,7 +2843,8 @@ uint32_t mds_mdtm_send_tipc(MDTM_SEND_REQ *req)
}
}
m_MMGR_FREE_BUFR_LIST(usrbuf);
-   free(body);
+   if (is_queued == 0)
+   free(body);
return NCSCC_RC_SUCCESS;
}
} break;
@@ -2864,6 +2873,7 @@ uint32_t