syslogd file system full

2017-10-05 Thread Alexander Bluhm
Hi,

When /var/log/ is full, syslogd(8) stops writing to these files.
It does this permanently so cleaning /var without SIGHUP to syslogd
does not help.  Better retry, write an error message to other log
hosts, and write a summary after it works again.

syslogd[52967]: write to file "/mnt/regress-syslogd/file.log": No space left on 
device
syslogd[52967]: dropped 36 messages to file "/mnt/regress-syslogd/file.log"

I am not sure whether we should also retry logging to a file if
errno is EIO.  Can it ever recover?

ok?

bluhm

Index: usr.sbin/syslogd/syslogd.c
===
RCS file: /data/mirror/openbsd/cvs/src/usr.sbin/syslogd/syslogd.c,v
retrieving revision 1.252
diff -u -p -r1.252 syslogd.c
--- usr.sbin/syslogd/syslogd.c  5 Oct 2017 16:34:59 -   1.252
+++ usr.sbin/syslogd/syslogd.c  5 Oct 2017 16:39:42 -
@@ -158,7 +158,6 @@ struct filed {
struct tls  *f_ctx;
char*f_host;
int  f_reconnectwait;
-   int  f_dropped;
} f_forw;   /* forwarding address */
charf_fname[PATH_MAX];
struct {
@@ -177,6 +176,7 @@ struct filed {
int f_prevcount;/* repetition cnt of prevline */
unsigned int f_repeatcount; /* number of "repeated" msgs */
int f_quick;/* abort when matched */
+   int f_dropped;  /* warn, dropped message */
time_t  f_lasterrtime;  /* last error was reported */
 };
 
@@ -242,6 +242,7 @@ const char *ClientCertfile = NULL;
 const char *ClientKeyfile = NULL;
 const char *ServerCAfile = NULL;
 inttcpbuf_dropped = 0; /* count messages dropped from TCP or TLS */
+intfile_dropped = 0;   /* messages dropped due to file system full */
 intinit_dropped = 0;   /* messages dropped during initialization */
 
 #define CTL_READING_CMD1
@@ -1388,11 +1389,11 @@ tcp_writecb(struct bufferevent *bufev, v
log_debug("loghost \"%s\" successful write", f->f_un.f_forw.f_loghost);
f->f_un.f_forw.f_reconnectwait = 0;
 
-   if (f->f_un.f_forw.f_dropped > 0 &&
+   if (f->f_dropped > 0 &&
EVBUFFER_LENGTH(f->f_un.f_forw.f_bufev->output) < MAX_TCPBUF) {
snprintf(ebuf, sizeof(ebuf), "to loghost \"%s\"",
f->f_un.f_forw.f_loghost);
-   dropped_warn(&f->f_un.f_forw.f_dropped, ebuf);
+   dropped_warn(&f->f_dropped, ebuf);
}
 }
 
@@ -1443,7 +1444,7 @@ tcp_errorcb(struct bufferevent *bufev, s
evbuffer_drain(bufev->output, -1);
log_debug("loghost \"%s\" dropped partial message",
f->f_un.f_forw.f_loghost);
-   f->f_un.f_forw.f_dropped++;
+   f->f_dropped++;
}
 
tcp_connect_retry(bufev, f);
@@ -1894,6 +1895,7 @@ fprintlog(struct filed *f, int flags, ch
struct iovec *v;
int l, retryonce;
char line[LOG_MAXLINE + 1], repbuf[80], greetings[500];
+   char ebuf[ERRBUFSIZE];
 
v = iov;
if (f->f_type == F_WALL) {
@@ -2000,7 +2002,7 @@ fprintlog(struct filed *f, int flags, ch
if (EVBUFFER_LENGTH(f->f_un.f_forw.f_bufev->output) >=
MAX_TCPBUF) {
log_debug(" (dropped)");
-   f->f_un.f_forw.f_dropped++;
+   f->f_dropped++;
break;
}
/*
@@ -2016,7 +2018,7 @@ fprintlog(struct filed *f, int flags, ch
IncludeHostname ? " " : "");
if (l < 0) {
log_debug(" (dropped snprintf)");
-   f->f_un.f_forw.f_dropped++;
+   f->f_dropped++;
break;
}
l = evbuffer_add_printf(f->f_un.f_forw.f_bufev->output,
@@ -2028,7 +2030,7 @@ fprintlog(struct filed *f, int flags, ch
(char *)iov[4].iov_base);
if (l < 0) {
log_debug(" (dropped evbuffer_add_printf)");
-   f->f_un.f_forw.f_dropped++;
+   f->f_dropped++;
break;
}
bufferevent_enable(f->f_un.f_forw.f_bufev, EV_WRITE);
@@ -2058,11 +2060,23 @@ fprintlog(struct filed *f, int flags, ch
if (writev(f->f_file, iov, 6) < 0) {
int e = errno;
 
+   /* allow to recover from file system full */
+   if ((e == EIO || e == ENOSPC) && f->f_type == F_FILE) {
+   if (f->f_dropped++ == 0) {
+   f->f_type = F_UNUSED;
+  

Re: syslogd file system full

2017-10-23 Thread Alexander Bluhm
Anyone?

On Thu, Oct 05, 2017 at 07:19:29PM +0200, Alexander Bluhm wrote:
> Hi,
> 
> When /var/log/ is full, syslogd(8) stops writing to these files.
> It does this permanently so cleaning /var without SIGHUP to syslogd
> does not help.  Better retry, write an error message to other log
> hosts, and write a summary after it works again.
> 
> syslogd[52967]: write to file "/mnt/regress-syslogd/file.log": No space left 
> on device
> syslogd[52967]: dropped 36 messages to file "/mnt/regress-syslogd/file.log"
> 
> I am not sure whether we should also retry logging to a file if
> errno is EIO.  Can it ever recover?
> 
> ok?
> 
> bluhm
> 
> Index: usr.sbin/syslogd/syslogd.c
> ===
> RCS file: /data/mirror/openbsd/cvs/src/usr.sbin/syslogd/syslogd.c,v
> retrieving revision 1.252
> diff -u -p -r1.252 syslogd.c
> --- usr.sbin/syslogd/syslogd.c5 Oct 2017 16:34:59 -   1.252
> +++ usr.sbin/syslogd/syslogd.c5 Oct 2017 16:39:42 -
> @@ -158,7 +158,6 @@ struct filed {
>   struct tls  *f_ctx;
>   char*f_host;
>   int  f_reconnectwait;
> - int  f_dropped;
>   } f_forw;   /* forwarding address */
>   charf_fname[PATH_MAX];
>   struct {
> @@ -177,6 +176,7 @@ struct filed {
>   int f_prevcount;/* repetition cnt of prevline */
>   unsigned int f_repeatcount; /* number of "repeated" msgs */
>   int f_quick;/* abort when matched */
> + int f_dropped;  /* warn, dropped message */
>   time_t  f_lasterrtime;  /* last error was reported */
>  };
>  
> @@ -242,6 +242,7 @@ const char *ClientCertfile = NULL;
>  const char *ClientKeyfile = NULL;
>  const char *ServerCAfile = NULL;
>  int  tcpbuf_dropped = 0; /* count messages dropped from TCP or TLS */
> +int  file_dropped = 0;   /* messages dropped due to file system full */
>  int  init_dropped = 0;   /* messages dropped during initialization */
>  
>  #define CTL_READING_CMD  1
> @@ -1388,11 +1389,11 @@ tcp_writecb(struct bufferevent *bufev, v
>   log_debug("loghost \"%s\" successful write", f->f_un.f_forw.f_loghost);
>   f->f_un.f_forw.f_reconnectwait = 0;
>  
> - if (f->f_un.f_forw.f_dropped > 0 &&
> + if (f->f_dropped > 0 &&
>   EVBUFFER_LENGTH(f->f_un.f_forw.f_bufev->output) < MAX_TCPBUF) {
>   snprintf(ebuf, sizeof(ebuf), "to loghost \"%s\"",
>   f->f_un.f_forw.f_loghost);
> - dropped_warn(&f->f_un.f_forw.f_dropped, ebuf);
> + dropped_warn(&f->f_dropped, ebuf);
>   }
>  }
>  
> @@ -1443,7 +1444,7 @@ tcp_errorcb(struct bufferevent *bufev, s
>   evbuffer_drain(bufev->output, -1);
>   log_debug("loghost \"%s\" dropped partial message",
>   f->f_un.f_forw.f_loghost);
> - f->f_un.f_forw.f_dropped++;
> + f->f_dropped++;
>   }
>  
>   tcp_connect_retry(bufev, f);
> @@ -1894,6 +1895,7 @@ fprintlog(struct filed *f, int flags, ch
>   struct iovec *v;
>   int l, retryonce;
>   char line[LOG_MAXLINE + 1], repbuf[80], greetings[500];
> + char ebuf[ERRBUFSIZE];
>  
>   v = iov;
>   if (f->f_type == F_WALL) {
> @@ -2000,7 +2002,7 @@ fprintlog(struct filed *f, int flags, ch
>   if (EVBUFFER_LENGTH(f->f_un.f_forw.f_bufev->output) >=
>   MAX_TCPBUF) {
>   log_debug(" (dropped)");
> - f->f_un.f_forw.f_dropped++;
> + f->f_dropped++;
>   break;
>   }
>   /*
> @@ -2016,7 +2018,7 @@ fprintlog(struct filed *f, int flags, ch
>   IncludeHostname ? " " : "");
>   if (l < 0) {
>   log_debug(" (dropped snprintf)");
> - f->f_un.f_forw.f_dropped++;
> + f->f_dropped++;
>   break;
>   }
>   l = evbuffer_add_printf(f->f_un.f_forw.f_bufev->output,
> @@ -2028,7 +2030,7 @@ fprintlog(struct filed *f, int flags, ch
>   (char *)iov[4].iov_base);
>   if (l < 0) {
>   log_debug(" (dropped evbuffer_add_printf)");
> - f->f_un.f_forw.f_dropped++;
> + f->f_dropped++;
>   break;
>   }
>   bufferevent_enable(f->f_un.f_forw.f_bufev, EV_WRITE);
> @@ -2058,11 +2060,23 @@ fprintlog(struct filed *f, int flags, ch
>   if (writev(f->f_file, iov, 6) < 0) {
>   int e = errno;
>  
> + /* allow to recover from file system full */
> + if ((e == EIO || e == ENOSPC)