Ack with comments.

Regards, Vu

________________________________
From: Thien Minh Huynh <thien.m.hu...@dektech.com.au>
Sent: Friday, July 10, 2020 8:53 AM
To: Thuan Tran <thuan.t...@dektech.com.au>; Thang Duc Nguyen 
<thang.d.ngu...@dektech.com.au>; Vu Minh Nguyen <vu.m.ngu...@dektech.com.au>
Cc: opensaf-devel@lists.sourceforge.net <opensaf-devel@lists.sourceforge.net>; 
Thien Minh Huynh <thien.m.hu...@dektech.com.au>
Subject: [PATCH 1/1] saflogger: correct wait time of saflogger in resilience 
[#3198]

When system running in resilience, if timeout of saflogger is
configured larger than logResilienceTimeout.The saflogger will
be loop forever.
[Vu] my suggestion:
When log resilient mode is enabled and timeout of saflogger is
configured larger than logResilienceTimeout , the saflogger will
be blocked forever until the underlying filesystem is responsive.

The fix is adding a time tracker between start poll and receive
SA_AIS_ERR_TRY_AGAIN, that to correct the wait time.
---
 src/log/tools/saf_logger.c | 18 ++++++++++++++++--
 1 file changed, 16 insertions(+), 2 deletions(-)

diff --git a/src/log/tools/saf_logger.c b/src/log/tools/saf_logger.c
index e9f7e9b36..ac7f832b3 100644
--- a/src/log/tools/saf_logger.c
+++ b/src/log/tools/saf_logger.c
@@ -149,6 +149,9 @@ static SaAisErrorT write_log_record(SaLogHandleT logHandle,
         struct pollfd fds[1];
         int ret;
         unsigned int wait_time = 0;
+       int64_t poll_timeout = g_timeout * 1000;
+       int64_t start_time = 0;
+       int64_t last_time = 0;
[Vu] Append suffixes of time unit after those time variables. e.g.
 int64_t poll_timeout_ms = g_timeout * 1000;

         i++;

@@ -176,8 +179,13 @@ retry:
         fds[0].fd = (int)selectionObject;
         fds[0].events = POLLIN;

+       poll_timeout -= (last_time - start_time);
+       if (poll_timeout < 0)
+               poll_timeout = 0;
+       start_time = get_current_SaTime() / 1000000;

+
 poll_retry:
-       ret = poll(fds, 1, g_timeout*1000);
+       ret = poll(fds, 1, poll_timeout);

         if (ret == -1 && errno == EINTR)
                 goto poll_retry;
@@ -188,6 +196,10 @@ poll_retry:
         }

         if (ret == 0) {
+               if (poll_timeout < g_timeout * 1000) {
[Vu] 'g_timeout*1000' is calculated twice, consider creating a common variable.
e.g. max_poll_timeout_ms = g_timeout*1000;

+                       wait_time += poll_timeout * 1000;
+                       goto retry_timeout;
[Vu]
'Go to retry_timeout' is confusing me. How about the if .. else?
e.g.
if (ret == 0 && poll_timeout == g_timeout*1000) {
// poll timeout
} else if (ret == 0) {
// has re-tried sometimes but still gets failed
}

+               }
                 fprintf(stderr,
                         "poll timeout, message %u was most likely lost\n", i);
                 return SA_AIS_ERR_BAD_OPERATION;
@@ -209,7 +221,8 @@ poll_retry:
         if (cb_error == SA_AIS_ERR_TRY_AGAIN &&
             wait_time < g_timeout*ONE_SECOND_TO_NS) {
                 usleep(HUNDRED_MS);
-               wait_time += HUNDRED_MS;
+               last_time = get_current_SaTime() / 1000000;
+               wait_time += (last_time - start_time) * 1000;
[Vu] It may be better to name these constant variables to descriptive names
e.g.:
US_TO_NS = 1000 * 1000;
SECOND_TO_MS = 1000;


                 goto retry;
         }

@@ -219,6 +232,7 @@ poll_retry:
                 goto retry;
         }

+retry_timeout:
         if (cb_error != SA_AIS_OK) {
                 if (wait_time)
                         fprintf(stderr, "Waited for %u seconds.\n",
--
2.17.1


_______________________________________________
Opensaf-devel mailing list
Opensaf-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/opensaf-devel

Reply via email to