This is a first cut of a modification to imfile to let it read multi-line files.

As-is, this should have no effect on a system as it hard-codes the mode to reading single lines (I really don't understand how to set a config variable, but for someone who does, it should be simple to replace the '0' in imfile.c with the value of the config file)

With this config option change, it should be possible to real logfiles that have blank lines between multi-line log entries and have those log entries treated as a single line.

I also have code in place (but disabled) to try and deal with the more complicated layout where all lines after the first one are indented if they are part of the same log entry. The problem I have is that when I discover that I have finished reading a log entry I have already read the first character of the next log entry. This extra character needs to be put pack into the input buffer, but I don't know if that is possible or not. If this isn't the case, I need a function that will let me peek at the next character in the input buffer and make my decision based on that.

This compiles, but I have not tested it anywhere yet. with the hardcoded mode 0 for ('LF termination), there should be no change other than an extra test against a constant for each character read from a file.

David Lang
diff --git a/plugins/imfile/imfile.c b/plugins/imfile/imfile.c
index 8a10e26..72d6e69 100644
--- a/plugins/imfile/imfile.c
+++ b/plugins/imfile/imfile.c
@@ -206,7 +206,7 @@ static rsRetVal pollFile(fileInfo_t *pThis, int *pbHadFileData)
 
 	/* loop below will be exited when strmReadLine() returns EOF */
 	while(1) {
-		CHKiRet(strm.ReadLine(pThis->pStrm, &pCStr));
+		CHKiRet(strm.ReadMultiLine(pThis->pStrm, &pCStr, 0));
 		*pbHadFileData = 1; /* this is just a flag, so set it and forget it */
 		CHKiRet(enqLine(pThis, pCStr)); /* process line */
 		rsCStrDestruct(&pCStr); /* discard string (must be done by us!) */
diff --git a/runtime/stream.c b/runtime/stream.c
index b429576..9c7880c 100644
--- a/runtime/stream.c
+++ b/runtime/stream.c
@@ -554,6 +554,78 @@ static rsRetVal strmUnreadChar(strm_t *pThis, uchar c)
 	return RS_RET_OK;
 }
 
+/* read a 'paragraph' from a strm file. This was modified from ReadLine 
+ * A paragraph may be terminated by a LF, by a LFLF, or by LF<not whitespace> depending on the option set.
+ * The termination LF characters are read, but are
+ * not returned in the buffer (it is discared). The caller is responsible for
+ * destruction of the returned CStr object! -- dlang 2010-12-13
+ */
+static rsRetVal
+strmReadMultiLine(strm_t *pThis, cstr_t **ppCStr, uchar mode)
+{
+	/* mode = 0 single line mode (equivalent to ReadLine)
+         * mode = 1 LFLF mode (paragraph, blank line between entries)
+         * mode = 3 LF <not whitespace> mode, a log line starts at the beginning of a line, but following lines that are indented are part of the same log entry
+         */
+        DEFiRet;
+        uchar c;
+
+        ASSERT(pThis != NULL);
+        ASSERT(ppCStr != NULL);
+
+        CHKiRet(cstrConstruct(ppCStr));
+
+        /* now read the line */
+        CHKiRet(strmReadChar(pThis, &c));
+        if (mode == 0){
+        	while(c != '\n') {
+                	CHKiRet(cstrAppendChar(*ppCStr, c));
+                	CHKiRet(strmReadChar(pThis, &c));
+        	}
+        	CHKiRet(cstrFinalize(*ppCStr));
+	}
+        if (mode == 1){
+        	while(c != '\n') {
+                	CHKiRet(cstrAppendChar(*ppCStr, c));
+                	CHKiRet(strmReadChar(pThis, &c));
+        	} 
+	} else {
+		if ((*ppCStr)->iStrLen > 0 && *((*ppCStr)->pBuf - 1) == '\n'){
+			rsCStrTruncate(*ppCStr,1); /* remove the prior newline */
+        		CHKiRet(cstrFinalize(*ppCStr));
+		} else {
+               		CHKiRet(cstrAppendChar(*ppCStr, c));
+               		CHKiRet(strmReadChar(pThis, &c));
+		}
+	}
+        if (mode == 2 && 1 == 2){
+        	while(c != '\n' && c != ' ' && c != '\t') {
+                	CHKiRet(cstrAppendChar(*ppCStr, c));
+                	CHKiRet(strmReadChar(pThis, &c));
+        	}
+	}else {
+		if ((*ppCStr)->iStrLen > 0 && *((*ppCStr)->pBuf - 1) == '\n'){
+			if (c != ' ' && c != '\t'){
+				rsCStrTruncate(*ppCStr,1); /* remove the prior newline */
+       				CHKiRet(cstrFinalize(*ppCStr));
+/* note that at this point I need to make the character just read part of the new string, or put it back on the buffer. I don't know how to do that yet */
+			} else {
+               			CHKiRet(cstrAppendChar(*ppCStr, c));
+               			CHKiRet(strmReadChar(pThis, &c));
+			}
+		} else {
+               		CHKiRet(cstrAppendChar(*ppCStr, c));
+               		CHKiRet(strmReadChar(pThis, &c));
+		}
+	}
+
+finalize_it:
+        if(iRet != RS_RET_OK && *ppCStr != NULL)
+                cstrDestruct(ppCStr);
+
+        RETiRet;
+}
+
 
 /* read a line from a strm file. A line is terminated by LF. The LF is read, but it
  * is not returned in the buffer (it is discared). The caller is responsible for
diff --git a/runtime/stream.h b/runtime/stream.h
index 37e9d57..989e9b6 100644
--- a/runtime/stream.h
+++ b/runtime/stream.h
@@ -156,6 +156,7 @@ BEGINinterface(strm) /* name must also be changed in ENDinterface macro! */
 	rsRetVal (*SetFileName)(strm_t *pThis, uchar *pszName, size_t iLenName);
 	rsRetVal (*ReadChar)(strm_t *pThis, uchar *pC);
 	rsRetVal (*UnreadChar)(strm_t *pThis, uchar c);
+	rsRetVal (*ReadMultiLine)(strm_t *pThis, cstr_t **ppCStr, uchar mode);
 	rsRetVal (*ReadLine)(strm_t *pThis, cstr_t **ppCStr);
 	rsRetVal (*SeekCurrOffs)(strm_t *pThis);
 	rsRetVal (*Write)(strm_t *pThis, uchar *pBuf, size_t lenBuf);
_______________________________________________
rsyslog mailing list
http://lists.adiscon.net/mailman/listinfo/rsyslog
http://www.rsyslog.com

Reply via email to