osaf/services/saf/logsv/lgs/lgs_evt.c | 11 +- osaf/services/saf/logsv/lgs/lgs_file.c | 197 +++++++++++++++++++++-------- osaf/services/saf/logsv/lgs/lgs_filehdl.c | 19 +- osaf/services/saf/logsv/lgs/lgs_imm.c | 1 - osaf/services/saf/logsv/lgs/lgs_mbcsv.c | 7 - osaf/services/saf/logsv/lgs/lgs_mbcsv.h | 3 - osaf/services/saf/logsv/lgs/lgs_stream.c | 126 ++++++++---------- osaf/services/saf/logsv/lgs/lgs_stream.h | 1 - 8 files changed, 219 insertions(+), 146 deletions(-)
- Remove unnecessary data copying in log_file_api() and file_hndl_thread() - Return SA_AIS_ERR_TIMEOUT if the write operation time out when a log record shall be written. If the file thread is already "hanging" when a write is requested no attempt to write is made and SA_AIS_ERR_TRY_AGAIN is returned as before. - Try to recover file thread by recreating it if it hangs for a long time. - Recover if bad file descriptor or stale NFS handle. - Always reinitialize/reopen log files if a write operation fails, timeout of file thread (hanging file system) included. - Handle synchronization between nodes when log files cannot be created before a switch over without using any new flag that has to be checkpointed (remove "files_initialized" flag) - Incorrect handling of "partial write" is fixed. See #536 - Open log files with O_NONBLOCK. Answer client with AIS_ERR_TIMEOUT if EWOULDBLOCK/EAGAIN (record may be parially written) diff --git a/osaf/services/saf/logsv/lgs/lgs_evt.c b/osaf/services/saf/logsv/lgs/lgs_evt.c --- a/osaf/services/saf/logsv/lgs/lgs_evt.c +++ b/osaf/services/saf/logsv/lgs/lgs_evt.c @@ -707,8 +707,6 @@ static uint32_t lgs_ckpt_stream_open(lgs ckpt.ckpt_rec.stream_open.streamType = logStream->streamType; ckpt.ckpt_rec.stream_open.logRecordId = logStream->logRecordId; - - ckpt.ckpt_rec.stream_open.files_initiated = logStream->files_initiated; async_rc = lgs_ckpt_send_async(cb, &ckpt, NCS_MBCSV_ACT_ADD); if (async_rc == NCSCC_RC_SUCCESS) { @@ -1023,7 +1021,7 @@ static uint32_t proc_write_log_async_msg SaAisErrorT error = SA_AIS_OK; SaStringT logOutputString = NULL; SaUint32T buf_size; - int n; + int n, rc; TRACE_ENTER2("client_id %u, stream ID %u", param->client_id, param->lstr_id); @@ -1064,9 +1062,13 @@ static uint32_t proc_write_log_async_msg goto done; } - if (log_stream_write_h(stream, logOutputString, n) == -1) { + rc = log_stream_write_h(stream, logOutputString, n); + if (rc == -1) { error = SA_AIS_ERR_TRY_AGAIN; goto done; + } else if (rc == -2) { + error = SA_AIS_ERR_TIMEOUT; + goto done; } /* @@ -1084,7 +1086,6 @@ static uint32_t proc_write_log_async_msg ckpt.ckpt_rec.write_log.streamId = stream->streamId; ckpt.ckpt_rec.write_log.curFileSize = stream->curFileSize; ckpt.ckpt_rec.write_log.logFileCurrent = stream->logFileCurrent; - ckpt.ckpt_rec.write_log.files_initiated = stream->files_initiated; (void)lgs_ckpt_send_async(cb, &ckpt, NCS_MBCSV_ACT_ADD); } diff --git a/osaf/services/saf/logsv/lgs/lgs_file.c b/osaf/services/saf/logsv/lgs/lgs_file.c --- a/osaf/services/saf/logsv/lgs/lgs_file.c +++ b/osaf/services/saf/logsv/lgs/lgs_file.c @@ -34,7 +34,10 @@ #include "osaf_utility.h" +/* Max time to wait for file thread to finish */ #define MAX_WAITTIME_ms 500 /* ms */ +/* Max time to wait for hanging file thread before recovery */ +#define MAX_RECOVERYTIME_s 600 /* s */ #define GETTIME(x) osafassert(clock_gettime(CLOCK_REALTIME, &x) == 0); static pthread_mutex_t lgs_ftcom_mutex; /* For locking communication */ @@ -47,12 +50,14 @@ struct file_communicate { bool timeout_f; /* True if API has got a timeout. Thread shall not answer */ lgsf_treq_t request_code; /* Request code from API */ int return_code; /* Return code from handlers */ - size_t indata_size; - void *indata; /* In-parameters for handlers */ + void *indata_ptr; /* In-parameters for handlers */ size_t outdata_size; - void *outdata; /* Out data from handlers */ + void *outdata_ptr; /* Out data from handlers */ }; +/* Used for synchronizing and transfer of data ownership between main thread + * and file thread. + */ static struct file_communicate lgs_com_data = { .answer_f = false, .request_f = false, @@ -60,10 +65,17 @@ static struct file_communicate lgs_com_d .return_code = LGSF_NORETC }; +static pthread_t file_thread_id; +static struct timespec ftr_start_time; /* Start time used for file thread recovery */ +static bool ftr_started_flag = false; /* Set to true if thread is hanging */ + /***************************************************************************** * Utility functions *****************************************************************************/ +static int start_file_thread(void); +static void remove_file_thread(void); + /** * Creates absolute time to use with pthread_cond_timedwait. * @@ -88,6 +100,42 @@ static void get_timeout_time(struct time timeout_time->tv_nsec = (millisec2 % 1000) * 1000000; } +/** + * Checks if time to recover the file thread. If timeout do the recovery + * Global variables: + * ftr_start_time; Time saved when file thread was timed out. + * ftr_start_flag; Set to true when recovery timeout shall be measured. + * + */ +static void ft_check_recovery(void) +{ + struct timespec end_time; + uint64_t stime_ms, etime_ms, dtime_ms; + int rc; + + TRACE_ENTER2("ftr_started_flag = %d",ftr_started_flag); + if (ftr_started_flag == true) { + /* Calculate elapsed time */ + GETTIME(end_time); + stime_ms = (ftr_start_time.tv_sec * 1000) + (ftr_start_time.tv_nsec / 1000000); + etime_ms = (end_time.tv_sec * 1000) + (end_time.tv_nsec / 1000000); + dtime_ms = etime_ms - stime_ms; + + TRACE("dtime_ms = %ld",dtime_ms); + + if (dtime_ms >= (MAX_RECOVERYTIME_s * 1000)) { + TRACE("Recovering file thread"); + remove_file_thread(); + rc = start_file_thread(); + if (rc) { + LOG_ER("File thread could not be recovered. Exiting..."); + _Exit(EXIT_FAILURE); + } + } + } + TRACE_LEAVE(); +} + /***************************************************************************** * Thread handling *****************************************************************************/ @@ -106,9 +154,7 @@ static void *file_hndl_thread(void *nopa { int rc = 0; int hndl_rc = 0; - void *inbuf; - void *outbuf; - uint32_t max_outsize; + int dummy; //#define LLD_DELAY_TST #ifdef LLD_DELAY_TST /* Make "file system" hang for n sec after start */ @@ -117,6 +163,9 @@ static void *file_hndl_thread(void *nopa #endif TRACE("%s - is started",__FUNCTION__); + /* Configure cancellation so that thread can be canceled at any time */ + pthread_setcancelstate(PTHREAD_CANCEL_ENABLE, &dummy); + pthread_setcanceltype(PTHREAD_CANCEL_ASYNCHRONOUS, &dummy); osaf_mutex_lock_ordie(&lgs_ftcom_mutex); /* LOCK */ while(1) { @@ -126,21 +175,6 @@ static void *file_hndl_thread(void *nopa if (rc != 0) osaf_abort(rc); } else { - /* Handle communication buffer */ - if (lgs_com_data.indata_size != 0) { - inbuf = malloc(lgs_com_data.indata_size); - memcpy(inbuf, lgs_com_data.indata, lgs_com_data.indata_size); - } else { - inbuf = NULL; - } - - if (lgs_com_data.outdata_size != 0) { - outbuf = malloc(lgs_com_data.outdata_size); - } else { - outbuf = NULL; - } - max_outsize = lgs_com_data.outdata_size; - /* Handle the request. * A handler is handling file operations that may 'hang'. Therefore * the mutex cannot be locked since that may cause the main thread @@ -162,34 +196,44 @@ static void *file_hndl_thread(void *nopa /* Invoke requested handler function */ switch (lgs_com_data.request_code) { case LGSF_FILEOPEN: - hndl_rc = fileopen_hdl(inbuf, outbuf, max_outsize); + hndl_rc = fileopen_hdl(lgs_com_data.indata_ptr, + lgs_com_data.outdata_ptr, lgs_com_data.outdata_size); break; case LGSF_FILECLOSE: - hndl_rc = fileclose_hdl(inbuf, outbuf, max_outsize); + hndl_rc = fileclose_hdl(lgs_com_data.indata_ptr, + lgs_com_data.outdata_ptr, lgs_com_data.outdata_size); break; case LGSF_DELETE_FILE: - hndl_rc = delete_file_hdl(inbuf, outbuf, max_outsize); + hndl_rc = delete_file_hdl(lgs_com_data.indata_ptr, + lgs_com_data.outdata_ptr, lgs_com_data.outdata_size); break; case LGSF_GET_NUM_LOGFILES: - hndl_rc = get_number_of_log_files_hdl(inbuf, outbuf, max_outsize); + hndl_rc = get_number_of_log_files_hdl(lgs_com_data.indata_ptr, + lgs_com_data.outdata_ptr, lgs_com_data.outdata_size); break; case LGSF_MAKELOGDIR: - hndl_rc = make_log_dir_hdl(inbuf, outbuf, max_outsize); + hndl_rc = make_log_dir_hdl(lgs_com_data.indata_ptr, + lgs_com_data.outdata_ptr, lgs_com_data.outdata_size); break; case LGSF_WRITELOGREC: - hndl_rc = write_log_record_hdl(inbuf, outbuf, max_outsize); + hndl_rc = write_log_record_hdl(lgs_com_data.indata_ptr, + lgs_com_data.outdata_ptr, lgs_com_data.outdata_size); break; case LGSF_CREATECFGFILE: - hndl_rc = create_config_file_hdl(inbuf, outbuf, max_outsize); + hndl_rc = create_config_file_hdl(lgs_com_data.indata_ptr, + lgs_com_data.outdata_ptr, lgs_com_data.outdata_size); break; case LGSF_RENAME_FILE: - hndl_rc = rename_file_hdl(inbuf, outbuf, max_outsize); + hndl_rc = rename_file_hdl(lgs_com_data.indata_ptr, + lgs_com_data.outdata_ptr, lgs_com_data.outdata_size); break; case LGSF_CHECKPATH: - hndl_rc = check_path_exists_hdl(inbuf, outbuf, max_outsize); + hndl_rc = check_path_exists_hdl(lgs_com_data.indata_ptr, + lgs_com_data.outdata_ptr, lgs_com_data.outdata_size); break; case LGSF_CHECKDIR: - hndl_rc = path_is_writeable_dir_hdl(inbuf, outbuf, max_outsize); + hndl_rc = path_is_writeable_dir_hdl(lgs_com_data.indata_ptr, + lgs_com_data.outdata_ptr, lgs_com_data.outdata_size); default: break; } @@ -206,24 +250,15 @@ static void *file_hndl_thread(void *nopa */ lgs_com_data.request_f = false; /* Prepare to take a new request */ lgs_com_data.request_code = LGSF_NOREQ; - free(inbuf); /* The following cannot be done if the API has timed out */ if (lgs_com_data.timeout_f == false) { lgs_com_data.answer_f = true; lgs_com_data.return_code = hndl_rc; - if (lgs_com_data.outdata_size != 0) { - memcpy(lgs_com_data.outdata, outbuf, lgs_com_data.outdata_size); - free(outbuf); - } else { - lgs_com_data.outdata = NULL; - } /* Signal the API function that we are done */ rc = pthread_cond_signal(&answer_cv); if (rc != 0) osaf_abort(rc); - } else { - free(outbuf); } } } /* End while(1) */ @@ -238,7 +273,6 @@ static int start_file_thread(void) { int rc = 0; int tbd_inpar=1; - pthread_t thread; TRACE_ENTER(); @@ -261,7 +295,7 @@ static int start_file_thread(void) /* Create thread. */ - rc = pthread_create(&thread, NULL, file_hndl_thread, (void *) &tbd_inpar); + rc = pthread_create(&file_thread_id, NULL, file_hndl_thread, (void *) &tbd_inpar); if (rc != 0) { LOG_ER("pthread_create fail %s",strerror(errno)); goto done; @@ -273,6 +307,44 @@ done: } /** + * Remove and cleanup file thread + */ +static void remove_file_thread(void) +{ + int rc; + + TRACE_ENTER(); + + /* Remove the thread */ + rc = pthread_cancel(file_thread_id); + if (rc) { + TRACE("pthread_cancel fail - %s",strerror(rc)); + } + + /* Cleanup mutex and conditions */ + rc = pthread_cond_destroy(&request_cv); + if (rc) { + TRACE("pthread_cond_destroy, request_cv fail - %s",strerror(rc)); + } + rc = pthread_cond_destroy(&answer_cv); + if (rc) { + TRACE("pthread_cond_destroy, answer_cv fail - %s",strerror(rc)); + } + rc = pthread_mutex_destroy(&lgs_ftcom_mutex); + if (rc) { + TRACE("pthread_cond_destroy, answer_cv fail - %s",strerror(rc)); + } + + /* Clean up thread synchronization */ + lgs_com_data.answer_f = false; + lgs_com_data.request_f = false; + lgs_com_data.request_code = LGSF_NOREQ; + lgs_com_data.return_code = LGSF_NORETC; + + TRACE_LEAVE(); +} + +/** * Initialize threaded file handling */ uint32_t lgs_file_init(void) @@ -300,7 +372,8 @@ lgsf_retcode_t log_file_api(lgsf_apipar_ { lgsf_retcode_t api_rc = LGSF_SUCESS; int rc = 0; - struct timespec timeout_time, start_time, end_time; + struct timespec timeout_time; + struct timespec m_start_time, m_end_time; uint64_t stime_ms, etime_ms, dtime_ms; TRACE_ENTER(); @@ -319,17 +392,16 @@ lgsf_retcode_t log_file_api(lgsf_apipar_ /* Enter data for a request */ lgs_com_data.request_code = apipar_in->req_code_in; if (apipar_in->data_in_size != 0) { - lgs_com_data.indata = malloc(apipar_in->data_in_size); - memcpy(lgs_com_data.indata, apipar_in->data_in, apipar_in->data_in_size); + lgs_com_data.indata_ptr = malloc(apipar_in->data_in_size); + memcpy(lgs_com_data.indata_ptr, apipar_in->data_in, apipar_in->data_in_size); } else { - lgs_com_data.indata = NULL; + lgs_com_data.indata_ptr = NULL; } - lgs_com_data.indata_size = apipar_in->data_in_size; if (apipar_in->data_out_size != 0) { - lgs_com_data.outdata = malloc(apipar_in->data_out_size); + lgs_com_data.outdata_ptr = malloc(apipar_in->data_out_size); } else { - lgs_com_data.outdata = NULL; + lgs_com_data.outdata_ptr = NULL; } lgs_com_data.outdata_size = apipar_in->data_out_size; @@ -341,7 +413,7 @@ lgsf_retcode_t log_file_api(lgsf_apipar_ if (rc != 0) osaf_abort(rc); /* Wait for an answer */ - GETTIME(start_time); /* Used for TRACE of print of time to answer */ + GETTIME(m_start_time); /* Used for TRACE of print of time to answer */ get_timeout_time(&timeout_time, MAX_WAITTIME_ms); @@ -352,6 +424,9 @@ lgsf_retcode_t log_file_api(lgsf_apipar_ TRACE("Timed out before answer"); api_rc = LGSF_TIMEOUT; lgs_com_data.timeout_f = true; /* Inform thread about timeout */ + /* Set start time for thread recovery timeout */ + GETTIME(ftr_start_time); + ftr_started_flag = true; /* Switch on timeout check */ goto done; } else if (rc != 0) { TRACE("pthread wait Failed - %s",strerror(rc)); @@ -366,12 +441,12 @@ lgsf_retcode_t log_file_api(lgsf_apipar_ * the returned data. */ apipar_in->hdl_ret_code_out = lgs_com_data.return_code; - memcpy(apipar_in->data_out, lgs_com_data.outdata, lgs_com_data.outdata_size); + memcpy(apipar_in->data_out, lgs_com_data.outdata_ptr, lgs_com_data.outdata_size); /* Measure answer time for TRACE */ - GETTIME(end_time); - stime_ms = (start_time.tv_sec * 1000) + (start_time.tv_nsec / 1000000); - etime_ms = (end_time.tv_sec * 1000) + (end_time.tv_nsec / 1000000); + GETTIME(m_end_time); + stime_ms = (m_start_time.tv_sec * 1000) + (m_start_time.tv_nsec / 1000000); + etime_ms = (m_end_time.tv_sec * 1000) + (m_end_time.tv_nsec / 1000000); dtime_ms = etime_ms - stime_ms; TRACE("Time waited for answer %ld ms",dtime_ms); @@ -379,13 +454,23 @@ lgsf_retcode_t log_file_api(lgsf_apipar_ lgs_com_data.answer_f = false; lgs_com_data.return_code = LGSF_NORETC; + /* We are not hanging. Switch off recovery timer if armed */ + ftr_started_flag = false; + done: /* Prepare for new request/answer cycle */ - if (lgs_com_data.indata != NULL) free(lgs_com_data.indata); - if (lgs_com_data.outdata != NULL) free(lgs_com_data.outdata); + if (lgs_com_data.indata_ptr != NULL) free(lgs_com_data.indata_ptr); + if (lgs_com_data.outdata_ptr != NULL) free(lgs_com_data.outdata_ptr); api_exit: osaf_mutex_unlock_ordie(&lgs_ftcom_mutex); /* UNLOCK */ + /* If thread is hanging, check for how long time it has been hanging + * by reading time and compare with start time for hanging. + * If too long reset thread. Note: This must be done here after the mutex + * is unlocked. + */ + ft_check_recovery(); + TRACE_LEAVE(); return api_rc; } diff --git a/osaf/services/saf/logsv/lgs/lgs_filehdl.c b/osaf/services/saf/logsv/lgs/lgs_filehdl.c --- a/osaf/services/saf/logsv/lgs/lgs_filehdl.c +++ b/osaf/services/saf/logsv/lgs/lgs_filehdl.c @@ -251,7 +251,7 @@ int write_log_record_hdl(void *indata, v } else { /* Handle partial writes */ bytes_written += rc; - if (bytes_written < params_in->fixedLogRecordSize) + if (bytes_written < params_in->record_size) goto retry; } @@ -379,7 +379,13 @@ done: } /** - * Open/create a file for append + * Open/create a file for append in non blocking mode. + * Note: The file is opened in NONBLOCK mode directly. This makes it possible + * that the open succeeds but the following write will fail. To avoid + * this the file can be opened without the O_NONBLOCK flag and set this + * flag using fcntl(). But write handling is done so that if a write + * fails the log file will always be reopened. + * * @param indata[in], Null-terminated string containing filename to open * @param outdata[out], int errno, 0 if no error * @param max_outsize[in], always sizeof(int) @@ -387,18 +393,19 @@ done: */ int fileopen_hdl(void *indata, void *outdata, size_t max_outsize) { - int fd_out; int errno_save = 0; char *filepath = (char *) indata; int *errno_out_p = (int *) outdata; + int fd; TRACE_ENTER(); TRACE("%s - filepath \"%s\"",__FUNCTION__,filepath); open_retry: - fd_out = open(filepath, O_CREAT | O_RDWR | O_APPEND, S_IRUSR | S_IWUSR | S_IRGRP); + fd = open(filepath, O_CREAT | O_RDWR | O_APPEND | O_NONBLOCK, + S_IRUSR | S_IWUSR | S_IRGRP); - if (fd_out == -1) { + if (fd == -1) { if (errno == EINTR) goto open_retry; /* save errno for caller logging */ @@ -410,7 +417,7 @@ open_retry: *errno_out_p = errno_save; TRACE_LEAVE(); - return fd_out; + return fd; } /** diff --git a/osaf/services/saf/logsv/lgs/lgs_imm.c b/osaf/services/saf/logsv/lgs/lgs_imm.c --- a/osaf/services/saf/logsv/lgs/lgs_imm.c +++ b/osaf/services/saf/logsv/lgs/lgs_imm.c @@ -150,7 +150,6 @@ static uint32_t ckpt_stream(log_stream_t ckpt.ckpt_rec.stream_cfg.logFileFormat = stream->logFileFormat; ckpt.ckpt_rec.stream_cfg.severityFilter = stream->severityFilter; ckpt.ckpt_rec.stream_cfg.logFileCurrent = stream->logFileCurrent; - ckpt.ckpt_rec.stream_cfg.files_initiated = stream->files_initiated; rc = lgs_ckpt_send_async(lgs_cb, &ckpt, NCS_MBCSV_ACT_ADD); diff --git a/osaf/services/saf/logsv/lgs/lgs_mbcsv.c b/osaf/services/saf/logsv/lgs/lgs_mbcsv.c --- a/osaf/services/saf/logsv/lgs/lgs_mbcsv.c +++ b/osaf/services/saf/logsv/lgs/lgs_mbcsv.c @@ -449,7 +449,6 @@ uint32_t lgs_ckpt_stream_open_set(log_st stream_open->numOpeners = logStream->numOpeners; stream_open->streamType = logStream->streamType; stream_open->logRecordId = logStream->logRecordId; - stream_open->files_initiated = logStream->files_initiated; return NCSCC_RC_SUCCESS; } @@ -1094,7 +1093,6 @@ static uint32_t ckpt_proc_log_write(lgs_ stream->logRecordId = param->recordId; stream->curFileSize = param->curFileSize; strcpy(stream->logFileCurrent, param->logFileCurrent); - stream->files_initiated = param->files_initiated; done: free_edu_mem(param->logFileCurrent); @@ -1196,7 +1194,6 @@ uint32_t ckpt_proc_open_stream(lgs_cb_t stream->numOpeners = param->numOpeners; stream->creationTimeStamp = param->creationTimeStamp; strcpy(stream->logFileCurrent, param->logFileCurrent); - stream->files_initiated = param->files_initiated; } log_stream_print(stream); @@ -1320,7 +1317,6 @@ static uint32_t ckpt_proc_cfg_stream(lgs strcpy(stream->logFileFormat, param->logFileFormat); stream->severityFilter = param->severityFilter; strcpy(stream->logFileCurrent, param->logFileCurrent); - stream->files_initiated = param->files_initiated; done: /* Free strings allocated by the EDU encoder */ @@ -1569,7 +1565,6 @@ static uint32_t edp_ed_write_rec(EDU_HDL {EDU_EXEC, ncs_edp_uns32, 0, 0, 0, (long)&((lgs_ckpt_write_log_t *)0)->streamId, 0, NULL}, {EDU_EXEC, ncs_edp_uns32, 0, 0, 0, (long)&((lgs_ckpt_write_log_t *)0)->curFileSize, 0, NULL}, {EDU_EXEC, ncs_edp_string, 0, 0, 0, (long)&((lgs_ckpt_write_log_t *)0)->logFileCurrent, 0, NULL}, - {EDU_EXEC, ncs_edp_uns32, 0, 0, 0, (long)&((lgs_ckpt_write_log_t *)0)->files_initiated, 0, NULL}, {EDU_END, 0, 0, 0, 0, 0, 0, NULL}, }; @@ -1636,7 +1631,6 @@ static uint32_t edp_ed_open_stream_rec(E {EDU_EXEC, ncs_edp_uns32, 0, 0, 0, (long)&((lgs_ckpt_stream_open_t *)0)->numOpeners, 0, NULL}, {EDU_EXEC, ncs_edp_uns32, 0, 0, 0, (long)&((lgs_ckpt_stream_open_t *)0)->streamType, 0, NULL}, {EDU_EXEC, ncs_edp_uns32, 0, 0, 0, (long)&((lgs_ckpt_stream_open_t *)0)->logRecordId, 0, NULL}, - {EDU_EXEC, ncs_edp_uns32, 0, 0, 0, (long)&((lgs_ckpt_stream_open_t *)0)->files_initiated, 0, NULL}, {EDU_END, 0, 0, 0, 0, 0, 0, NULL}, }; @@ -1804,7 +1798,6 @@ static uint32_t edp_ed_cfg_stream_rec(ED {EDU_EXEC, ncs_edp_string, 0, 0, 0, (long)&((lgs_ckpt_stream_cfg_t *)0)->logFileFormat, 0, NULL}, {EDU_EXEC, ncs_edp_uns32, 0, 0, 0, (long)&((lgs_ckpt_stream_cfg_t *)0)->severityFilter, 0, NULL}, {EDU_EXEC, ncs_edp_string, 0, 0, 0, (long)&((lgs_ckpt_stream_cfg_t *)0)->logFileCurrent, 0, NULL}, - {EDU_EXEC, ncs_edp_uns32, 0, 0, 0, (long)&((lgs_ckpt_stream_cfg_t *)0)->files_initiated , 0, NULL}, {EDU_END, 0, 0, 0, 0, 0, 0, NULL}, }; diff --git a/osaf/services/saf/logsv/lgs/lgs_mbcsv.h b/osaf/services/saf/logsv/lgs/lgs_mbcsv.h --- a/osaf/services/saf/logsv/lgs/lgs_mbcsv.h +++ b/osaf/services/saf/logsv/lgs/lgs_mbcsv.h @@ -60,7 +60,6 @@ typedef struct { uint32_t recordId; uint32_t curFileSize; char *logFileCurrent; - bool files_initiated; } lgs_ckpt_write_log_t; typedef struct { @@ -81,7 +80,6 @@ typedef struct { char *logFileCurrent; logStreamTypeT streamType; uint32_t logRecordId; /* log record indentifier increased for each record */ - bool files_initiated; } lgs_ckpt_stream_open_t; typedef struct { @@ -111,7 +109,6 @@ typedef struct { char *logFileFormat; SaUint32T severityFilter; char *logFileCurrent; - bool files_initiated; } lgs_ckpt_stream_cfg_t; typedef struct { diff --git a/osaf/services/saf/logsv/lgs/lgs_stream.c b/osaf/services/saf/logsv/lgs/lgs_stream.c --- a/osaf/services/saf/logsv/lgs/lgs_stream.c +++ b/osaf/services/saf/logsv/lgs/lgs_stream.c @@ -48,7 +48,7 @@ static int lgs_stream_array_remove(int i static int get_number_of_log_files_h(log_stream_t *logStream, char *oldest_file); /** - * Open/Create a file + * Open/create a file for append in non blocking mode. * @param filepath[in] * @param errno_save[out], errno if error * @return File descriptor or -1 if error @@ -277,12 +277,7 @@ static uint32_t log_stream_remove(const } /** - * Initiate the files belonging to a stream if they are not already initiated. - * The stream->files_initiated flag is set when the files are initiated. - * If the files could not be initiated e.g. the file system is not working, the - * files_initiated flag will not be set. - * This function should be used when a stream is opened and every time a log - * record is written. + * Initiate the files belonging to a stream * * @param stream */ @@ -291,41 +286,35 @@ static void log_initiate_stream_files(lo int errno_save; TRACE_ENTER(); - TRACE_2("%s - stream->files_initiated = %d",__FUNCTION__,stream->files_initiated); - if (stream->files_initiated == false) { - - /* Delete to get counting right. It might not exist. */ - (void)delete_config_file(stream); + /* Initiate stream file descriptor to flag that no valid descriptor exist */ + stream->fd = -1; + + /* Delete to get counting right. It might not exist. */ + (void)delete_config_file(stream); - /* Save what filename that shall be the current log file */ - snprintf(stream->logFileCurrent, NAME_MAX, "%s_%s", stream->fileName, lgs_get_time()); + /* Remove files from a previous life if needed */ + if (rotate_if_needed(stream) == -1) { + TRACE("%s - rotate_if_needed() FAIL",__FUNCTION__); + goto done; + } - /* Remove files from a previous life if needed */ - if (rotate_if_needed(stream) == -1) { - TRACE("%s - rotate_if_needed() FAIL",__FUNCTION__); - goto done; - } + if (lgs_make_reldir_h(stream->pathName) != 0){ + TRACE("%s - lgs_make_dir_h() FAIL",__FUNCTION__); + goto done; + } - if (lgs_make_reldir_h(stream->pathName) != 0){ - TRACE("%s - lgs_make_dir_h() FAIL",__FUNCTION__); - goto done; - } + if (lgs_create_config_file_h(stream) != 0) { + TRACE("%s - lgs_create_config_file_h() FAIL",__FUNCTION__); + goto done; + } - if (lgs_create_config_file_h(stream) != 0) { - TRACE("%s - lgs_create_config_file_h() FAIL",__FUNCTION__); - goto done; - } + if ((stream->fd = log_file_open(stream, &errno_save)) == -1) { + TRACE("%s - Could not open '%s' - %s",__FUNCTION__, + stream->logFileCurrent, strerror(errno_save)); + goto done; + } - if ((stream->fd = log_file_open(stream, &errno_save)) == -1) { - TRACE("%s - Could not open '%s' - %s",__FUNCTION__, - stream->logFileCurrent, strerror(errno_save)); - goto done; - } - - stream->files_initiated = true; - } - done: TRACE_LEAVE(); } @@ -685,7 +674,8 @@ log_stream_t *log_stream_new_2(SaNameT * } /** - * Open file associated with stream + * Open log file associated with stream + * The file is opened in non blocking mode. * @param stream * @param errno_save - errno from open() returned here if supplied * @@ -728,12 +718,9 @@ SaAisErrorT log_stream_open(log_stream_t /* first time open? */ if (stream->numOpeners == 0) { + /* Create and save current log file name */ + snprintf(stream->logFileCurrent, NAME_MAX, "%s_%s", stream->fileName, lgs_get_time()); log_initiate_stream_files(stream); - } else { - /* Second or more open on a stream */ - if (stream->fd == -1) { - log_initiate_stream_files(stream); - } } /* Opening a stream will always succeed. If file system problem a new @@ -893,7 +880,9 @@ done: * @param buf * @param count * - * @return int -1 on error, 0 otherwise + * @return int 0 No error + * -1 on error + * -2 Write failed because of write timeout or EWOULDBLOCK/EAGAIN */ int log_stream_write_h(log_stream_t *stream, const char *buf, size_t count) { @@ -912,26 +901,17 @@ int log_stream_write_h(log_stream_t *str /* Open files on demand e.g. on new active after fail/switch-over. This * enables LOG to cope with temporary file system problems. */ if (stream->fd == -1) { - /* Creating directory of given path to store log and cfg files, - * if not using shared file system. */ - if (lgs_make_reldir_h(stream->pathName) != 0) { - LOG_NO("Create directory '%s/%s' failed", lgs_cb->logsv_root_dir, stream->pathName); - rc = -1; - goto done; + /* Create directory and log files if they were not created at + * stream open or reopen files if bad file descriptor. + */ + log_initiate_stream_files(stream); + + if (stream->fd == -1) { + TRACE("%s - Initiating stream files \"%s\" Failed", __FUNCTION__, + stream->name); + } else { + TRACE("%s - stream files initiated", __FUNCTION__); } - /* Creating config file on new ACTIVE */ - if (lgs_create_config_file_h(stream) != 0) { - TRACE("Creating config file failed"); - rc = -1; - goto done; - } - TRACE("stream: %s not opened, opening it now", stream->name); - stream->fd = log_file_open(stream, NULL); - if (stream->fd == -1) { - rc = -1; - goto done; - } - TRACE("stream %s now opened", stream->name); } TRACE("%s - stream->fd = %d",__FUNCTION__,stream->fd); @@ -957,7 +937,10 @@ int log_stream_write_h(log_stream_t *str apipar.data_out = (void *) &write_errno; api_rc = log_file_api(&apipar); - if (api_rc != LGSF_SUCESS) { + if (api_rc == LGSF_TIMEOUT) { + TRACE("%s - API error %s",__FUNCTION__,lgsf_retcode_str(api_rc)); + rc = -2; + } else if (api_rc != LGSF_SUCESS) { TRACE("%s - API error %s",__FUNCTION__,lgsf_retcode_str(api_rc)); rc = -1; } else { @@ -967,20 +950,29 @@ int log_stream_write_h(log_stream_t *str free(params_in); /* End write the log record */ - if (rc == -1) { - /* If writing failed because of invalid file descriptor then invalidate - * the stream file descriptor. + if ((rc == -1) || (rc == -2)) { + /* If writing failed always invalidate the stream file descriptor. */ /* Careful with log level here to avoid syslog flooding */ LOG_IN("write '%s' failed \"%s\"", stream->logFileCurrent, strerror(write_errno)); - if (write_errno == EBADF) { + if (stream->fd != -1) { + /* Try to close the file and invalidate the stream fd */ + fileclose_h(stream->fd); stream->fd = -1; } + + if ((write_errno == EAGAIN) || (write_errno == EWOULDBLOCK)) { + /* Change return code to timeout if EAGAIN (would block) */ + TRACE("Write would block"); + rc = -2; + } goto done; } + /* Handle file size and rotate if needed + */ rc = 0; stream->curFileSize += count; diff --git a/osaf/services/saf/logsv/lgs/lgs_stream.h b/osaf/services/saf/logsv/lgs/lgs_stream.h --- a/osaf/services/saf/logsv/lgs/lgs_stream.h +++ b/osaf/services/saf/logsv/lgs/lgs_stream.h @@ -47,7 +47,6 @@ typedef struct log_stream { SaUint64T filtered; /* discarded by server due to filtering */ /* --- end correspond to IMM Class --- */ - bool files_initiated; /* True when all files belonging to stream are initiated */ uint32_t streamId; /* The unique stream id for this stream */ int32_t fd; /* The stream file descriptor */ char logFileCurrent[NAME_MAX]; /* Current file name */ ------------------------------------------------------------------------------ Get 100% visibility into Java/.NET code with AppDynamics Lite! It's a free troubleshooting tool designed for production. Get down to code-level detail for bottlenecks, with <2% overhead. Download for free and get started troubleshooting in minutes. http://pubads.g.doubleclick.net/gampad/clk?id=48897031&iu=/4140/ostg.clktrk _______________________________________________ Opensaf-devel mailing list Opensaf-devel@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/opensaf-devel