Ack. See also related #442. I think this is also fixed with this patch Thanks Lennart
> -----Original Message----- > From: mathi.naic...@oracle.com [mailto:mathi.naic...@oracle.com] > Sent: den 15 april 2014 02:56 > To: Lennart Lund > Cc: opensaf-devel@lists.sourceforge.net > Subject: [PATCH 1 of 1] log: saflogger tool to honour try again [#839] > > osaf/tools/saflog/saflogger/saf_logger.c | 82 > +++++++++++++++++++++++++------ > 1 files changed, 66 insertions(+), 16 deletions(-) > > > Currently the saflogger tool does not honours the > SA_AIS_ERR_TRY_AGAIN for the > saLogInitialize(), saLogStreamOpen_2(), saLogStreamClose(), > saLogFinalize() > APIs. This can create problems in the context of the overload > protection > scheme in the LOG Server and the upcoming #793 that provides for a > flow > control mechanism. The absence of a try again mechanism can lead to > discarded writes(ofcourse depends on how the enduser has integrated > the > saflogger tool) and other problems for eg:- It has been observed that > if a > TRY_AGAIN(after sometime) is not attmepted on streamOpen, can lead > to other > serious problems. This patch does the following: > - Enables the saflogger tool to TRY_AGAIN for all the LOG APIs. > - Waits for 10 ms before trying again, until a worstcase of 10 > seconds. > - Removes an undesired error mesasge when try_again is hit. > - Avoids an indefinite loop for log Writes() and instead makes it re- > attempt > till a worstcase of 10 seconds. > > diff --git a/osaf/tools/saflog/saflogger/saf_logger.c > b/osaf/tools/saflog/saflogger/saf_logger.c > --- a/osaf/tools/saflog/saflogger/saf_logger.c > +++ b/osaf/tools/saflog/saflogger/saf_logger.c > @@ -48,6 +48,11 @@ > #define DEFAULT_APP_LOG_FILE_SIZE 1024 > #define VENDOR_ID 193 > #define DEFAULT_MAX_FILES_ROTATED 4 > +/* Try for 10 seconds before giving up on an API */ #define TEN_SECONDS > +10*1000*1000 > +/* Sleep for 100 ms before retrying an API */ #define HUNDRED_MS > +100*1000 > +/* To the reviewer: Should we increase either of the above two > +timeperiod? */ > > static void logWriteLogCallbackT(SaInvocationT invocation, SaAisErrorT > error); > > @@ -121,9 +126,9 @@ static SaAisErrorT write_log_record(SaLo > SaAisErrorT errorCode; > SaInvocationT invocation; > int i = 0; > - int try_agains = 0; > struct pollfd fds[1]; > int ret; > + unsigned int wait_time = 0; > > i++; > > @@ -131,13 +136,15 @@ static SaAisErrorT write_log_record(SaLo > > retry: > errorCode = saLogWriteLogAsync(logStreamHandle, invocation, > SA_LOG_RECORD_WRITE_ACK, logRecord); > - if (errorCode == SA_AIS_ERR_TRY_AGAIN) { > - usleep(100000); /* 100 ms */ > - try_agains++; > + if (errorCode == SA_AIS_ERR_TRY_AGAIN && wait_time < > TEN_SECONDS) { > + usleep(HUNDRED_MS); > + wait_time += HUNDRED_MS; > goto retry; > } > > if (errorCode != SA_AIS_OK) { > + if (wait_time) > + fprintf(stderr, "Waited for %u seconds.\n", > wait_time/1000000); > fprintf(stderr, "saLogWriteLogAsync FAILED: %s\n", > saf_error(errorCode)); > return errorCode; > } > @@ -172,28 +179,25 @@ poll_retry: > return errorCode; > } > > - if (cb_error == SA_AIS_ERR_TRY_AGAIN) { > - usleep(100000); /* 100 ms */ > - try_agains++; > + if (cb_error == SA_AIS_ERR_TRY_AGAIN && wait_time < > TEN_SECONDS) { > + usleep(HUNDRED_MS); > + wait_time += HUNDRED_MS; > goto retry; > } > > if (cb_error == SA_AIS_ERR_TIMEOUT) { > - usleep(100000); /* 100 ms */ > + usleep(HUNDRED_MS); > fprintf(stderr, "got SA_AIS_ERR_TIMEOUT, retry\n"); > goto retry; > } > > if (cb_error != SA_AIS_OK) { > + if (wait_time) > + fprintf(stderr, "Waited for %u seconds.\n", > wait_time/1000000); > fprintf(stderr, "logWriteLogCallbackT FAILED: %s\n", > saf_error(cb_error)); > return errorCode; > } > > - if (try_agains > 0) { > - fprintf(stderr, "got %u SA_AIS_ERR_TRY_AGAIN, waited %u > secs\n", try_agains, try_agains / 10); > - try_agains = 0; > - } > - > return errorCode; > } > > @@ -249,6 +253,7 @@ int main(int argc, char *argv[]) > SaLogHandleT logHandle; > SaLogStreamHandleT logStreamHandle; > SaSelectionObjectT selectionObject; > + unsigned int wait_time; > > srandom(getpid()); > > @@ -338,8 +343,17 @@ int main(int argc, char *argv[]) > logRecord.logBuffer = &logBuffer; > } > > + wait_time = 0; > error = saLogInitialize(&logHandle, &logCallbacks, &logVersion); > + while (error == SA_AIS_ERR_TRY_AGAIN && wait_time < > TEN_SECONDS) { > + usleep(HUNDRED_MS); > + wait_time += HUNDRED_MS; > + error = saLogInitialize(&logHandle, &logCallbacks, > &logVersion); > + } > + > if (error != SA_AIS_OK) { > + if (wait_time) > + fprintf(stderr, "Waited for %u seconds.\n", > wait_time/1000000); > fprintf(stderr, "saLogInitialize FAILED: %s\n", > saf_error(error)); > exit(EXIT_FAILURE); > } > @@ -353,33 +367,69 @@ int main(int argc, char *argv[]) > /* Try open the stream before creating it. It might be a configured > app > * stream with other attributes than we have causing open with > default > * attributes to fail */ > + wait_time = 0; > error = saLogStreamOpen_2(logHandle, &logStreamName, NULL, 0, > SA_TIME_ONE_SECOND, &logStreamHandle); > + while (error == SA_AIS_ERR_TRY_AGAIN && wait_time < > TEN_SECONDS) { > + usleep(HUNDRED_MS); > + wait_time += HUNDRED_MS; > + error = saLogStreamOpen_2(logHandle, &logStreamName, > NULL, 0, > + SA_TIME_ONE_SECOND, > &logStreamHandle); > + } > > if (error == SA_AIS_ERR_NOT_EXIST) { > + wait_time = 0; > error = saLogStreamOpen_2(logHandle, &logStreamName, > logFileCreateAttributes, > logStreamOpenFlags, > SA_TIME_ONE_SECOND, &logStreamHandle); > - if (error != SA_AIS_OK) { > - fprintf(stderr, "saLogStreamOpen_2 FAILED: %s\n", > saf_error(error)); > - exit(EXIT_FAILURE); > + while (error == SA_AIS_ERR_TRY_AGAIN && wait_time < > TEN_SECONDS) { > + usleep(HUNDRED_MS); > + wait_time += HUNDRED_MS; > + error = saLogStreamOpen_2(logHandle, > &logStreamName, logFileCreateAttributes, > + logStreamOpenFlags, > SA_TIME_ONE_SECOND, &logStreamHandle); > } > } > > + if (error != SA_AIS_OK) { > + if (wait_time) > + fprintf(stderr, "Waited for %u seconds.\n", > wait_time/1000000); > + fprintf(stderr, "saLogStreamOpen_2 FAILED: %s\n", > saf_error(error)); > + exit(EXIT_FAILURE); > + } > + > if (write_log_record(logHandle, logStreamHandle, selectionObject, > &logRecord) != SA_AIS_OK) { > exit(EXIT_FAILURE); > } > > + wait_time = 0; > error = saLogStreamClose(logStreamHandle); > + while (error == SA_AIS_ERR_TRY_AGAIN && wait_time < > TEN_SECONDS) { > + usleep(HUNDRED_MS); > + wait_time += HUNDRED_MS; > + error = saLogStreamClose(logStreamHandle); > + } > + > if (SA_AIS_OK != error) { > + if (wait_time) > + fprintf(stderr, "Waited for %u seconds.\n", > wait_time/1000000); > fprintf(stderr, "saLogStreamClose FAILED: %s\n", > saf_error(error)); > exit(EXIT_FAILURE); > } > > + wait_time = 0; > error = saLogFinalize(logHandle); > + while (error == SA_AIS_ERR_TRY_AGAIN && wait_time < > TEN_SECONDS) { > + usleep(HUNDRED_MS); > + wait_time += HUNDRED_MS; > + error = saLogFinalize(logHandle); > + } > + > if (SA_AIS_OK != error) { > + if (wait_time) > + fprintf(stderr, "Waited for %u seconds.\n", > wait_time/1000000); > fprintf(stderr, "saLogFinalize FAILED: %s\n", > saf_error(error)); > exit(EXIT_FAILURE); > } > > exit(EXIT_SUCCESS); > } > + ------------------------------------------------------------------------------ Start Your Social Network Today - Download eXo Platform Build your Enterprise Intranet with eXo Platform Software Java Based Open Source Intranet - Social, Extensible, Cloud Ready Get Started Now And Turn Your Intranet Into A Collaboration Platform http://p.sf.net/sfu/ExoPlatform _______________________________________________ Opensaf-devel mailing list Opensaf-devel@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/opensaf-devel