On Thu, Apr 09, 2015 at 05:20:43PM +0200, Michal Sekletar wrote:
> On Thu, Apr 09, 2015 at 02:44:38PM +0000, Zbigniew Jędrzejewski-Szmek wrote:
> > On Thu, Apr 09, 2015 at 04:35:53PM +0200, Michal Sekletar wrote:
> > > On Thu, Apr 09, 2015 at 02:10:14PM +0000, Zbigniew Jędrzejewski-Szmek 
> > > wrote:
> > > > On Thu, Apr 09, 2015 at 03:20:02PM +0200, Michal Sekletar wrote:
> > > > > Users might have hard time figuring out why exactly their systemctl 
> > > > > request
> > > > > failed. If dbus job fails try to figure out more details about 
> > > > > failure by
> > > > > examining Result property of the service.
> > > > > 
> > > > > https://bugzilla.redhat.com/show_bug.cgi?id=1016680
> > > > > ---
> > > > >  src/libsystemd/sd-bus/bus-util.c | 41 +++++++++++++++++++---
> > > > >  src/shared/log.c                 | 76 
> > > > > ++++++++++++++++++++++++++++++++++++++++
> > > > >  src/shared/log.h                 |  2 ++
> > > > >  3 files changed, 114 insertions(+), 5 deletions(-)
> > > > > 
> > > > > diff --git a/src/libsystemd/sd-bus/bus-util.c 
> > > > > b/src/libsystemd/sd-bus/bus-util.c
> > > > > index 6498769..bb4c993 100644
> > > > > --- a/src/libsystemd/sd-bus/bus-util.c
> > > > > +++ b/src/libsystemd/sd-bus/bus-util.c
> > > > > @@ -30,6 +30,7 @@
> > > > >  #include "path-util.h"
> > > > >  #include "missing.h"
> > > > >  #include "set.h"
> > > > > +#include "unit-name.h"
> > > > >  
> > > > >  #include "sd-bus.h"
> > > > >  #include "bus-error.h"
> > > > > @@ -1716,6 +1717,35 @@ static int bus_process_wait(sd_bus *bus) {
> > > > >          }
> > > > >  }
> > > > >  
> > > > > +static int bus_job_get_service_result(BusWaitForJobs *d, char 
> > > > > **result) {
> > > > > +        int r = 0;
> > > > > +        _cleanup_free_ char *dbus_path = NULL, *p = NULL;
> > > > > +
> > > > > +        assert(d);
> > > > > +        assert(d->result);
> > > > > +        assert(d->name);
> > > > > +        assert(result);
> > > > > +
> > > > > +        dbus_path = unit_dbus_path_from_name(d->name);
> > > > > +        if (!dbus_path)
> > > > > +                return -ENOMEM;
> > > > > +
> > > > > +        r = sd_bus_get_property_string(d->bus,
> > > > > +                                       "org.freedesktop.systemd1",
> > > > > +                                       dbus_path,
> > > > > +                                       
> > > > > "org.freedesktop.systemd1.Service",
> > > > > +                                       "Result",
> > > > > +                                       NULL,
> > > > > +                                       &p);
> > > > > +        if (r < 0)
> > > > > +                return r;
> > > > > +
> > > > > +        *result = p;
> > > > > +        p = NULL;
> > > > > +
> > > > > +        return 0;
> > > > > +}
> > > > > +
> > > > >  static int check_wait_response(BusWaitForJobs *d, bool quiet) {
> > > > >          int r = 0;
> > > > >  
> > > > > @@ -1736,13 +1766,14 @@ static int check_wait_response(BusWaitForJobs 
> > > > > *d, bool quiet) {
> > > > >                          log_error("Operation on or unit type of %s 
> > > > > not supported on this system.", strna(d->name));
> > > > >                  else if (!streq(d->result, "done") && 
> > > > > !streq(d->result, "skipped")) {
> > > > >                          if (d->name) {
> > > > > -                                bool quotes;
> > > > > +                                int q;
> > > > > +                                _cleanup_free_ char *result = NULL;
> > > > >  
> > > > > -                                quotes = chars_intersect(d->name, 
> > > > > SHELL_NEED_QUOTES);
> > > > > +                                q = bus_job_get_service_result(d, 
> > > > > &result);
> > > > > +                                if (q < 0)
> > > > > +                                        log_debug_errno(q, "Failed 
> > > > > to get Result property of service %s: %m", d->name);
> > > > >  
> > > > > -                                log_error("Job for %s failed. See 
> > > > > \"systemctl status %s%s%s\" and \"journalctl -xe\" for details.",
> > > > > -                                          d->name,
> > > > > -                                          quotes ? "'" : "", 
> > > > > d->name, quotes ? "'" : "");
> > > > > +                                
> > > > > log_job_error_with_service_result(d->name, result);
> > > > >                          } else
> > > > >                                  log_error("Job failed. See 
> > > > > \"journalctl -xe\" for details.");
> > > > >                  }
> > > > > diff --git a/src/shared/log.c b/src/shared/log.c
> > > > > index 646a1d6..3219756 100644
> > > > > --- a/src/shared/log.c
> > > > > +++ b/src/shared/log.c
> > > > > @@ -1061,3 +1061,79 @@ void log_received_signal(int level, const 
> > > > > struct signalfd_siginfo *si) {
> > > > >  void log_set_upgrade_syslog_to_journal(bool b) {
> > > > >          upgrade_syslog_to_journal = b;
> > > > >  }
> > > > > +
> > > > > +typedef enum ServiceResult {
> > > > > +        SERVICE_SUCCESS,
> > > > > +        SERVICE_FAILURE_RESOURCES,
> > > > > +        SERVICE_FAILURE_TIMEOUT,
> > > > > +        SERVICE_FAILURE_EXIT_CODE,
> > > > > +        SERVICE_FAILURE_SIGNAL,
> > > > > +        SERVICE_FAILURE_CORE_DUMP,
> > > > > +        SERVICE_FAILURE_WATCHDOG,
> > > > > +        SERVICE_FAILURE_START_LIMIT,
> > > > > +        _SERVICE_RESULT_MAX,
> > > > > +        _SERVICE_RESULT_INVALID = -1
> > > > > +} ServiceResult;
> > > > 
> > > > dbus property returns the result as a string. So we need to map this 
> > > > string
> > > > to an explanation. Going through the intermediate ServiceResult int does
> > > > not really gain anything... I think things would be simpler if you 
> > > > added a
> > > > table mapping result to explanation:
> > > > 
> > > > static const struct {
> > > >   const char *result, *explanation;
> > > > } explanations[] = {
> > > >   {"timeout", "start timeout was exceeded"},
> > > >   ...
> > > >   {}
> > > > };
> > > > 
> > > > and then simply looped over this table. This way it won't be necessary 
> > > > to
> > > > have three different lists.
> > > 
> > > I guess that what you are suggesting will remove a bit of boilerplate 
> > > code, will
> > > do that.
> > > 
> > > > 
> > > > This could live simply in systemctl.c. Those are only parts of
> > > > sentences, nothing generally useful.
> > > 
> > > I'd say that having general function for logging about failed dbus job 
> > > might be
> > > useful.
> > > 
> > > > 
> > > > > +static const char* const service_result_table[_SERVICE_RESULT_MAX] = 
> > > > > {
> > > > > +        [SERVICE_SUCCESS] = "success",
> > > > > +        [SERVICE_FAILURE_RESOURCES] = "resources",
> > > > > +        [SERVICE_FAILURE_TIMEOUT] = "timeout",
> > > > > +        [SERVICE_FAILURE_EXIT_CODE] = "exit-code",
> > > > > +        [SERVICE_FAILURE_SIGNAL] = "signal",
> > > > > +        [SERVICE_FAILURE_CORE_DUMP] = "core-dump",
> > > > > +        [SERVICE_FAILURE_WATCHDOG] = "watchdog",
> > > > > +        [SERVICE_FAILURE_START_LIMIT] = "start-limit"
> > > > > +};
> > > > > +
> > > > > +DEFINE_PRIVATE_STRING_TABLE_LOOKUP_FROM_STRING(service_result, 
> > > > > ServiceResult);
> > > > > +
> > > > > +static const char* const 
> > > > > service_result_error_message_table[_SERVICE_RESULT_MAX] = {
> > > > > +        [SERVICE_FAILURE_RESOURCES] = "configured resource limit was 
> > > > > exceeded",
> > > > > +        [SERVICE_FAILURE_TIMEOUT] = ,
> > > > > +        [SERVICE_FAILURE_EXIT_CODE] = "ExecStart process exited with 
> > > > > error code",
> > > > > +        [SERVICE_FAILURE_SIGNAL] = "fatal signal was delivered to 
> > > > > ExecStart process",
> > > > > +        [SERVICE_FAILURE_CORE_DUMP] = "fatal signal was delivered to 
> > > > > ExecStart process. Core dumped",
> > > > 
> > > > "fatal signal was delivered to ExecStart process and it dumped core"?
> > > > 
> > > > > +        [SERVICE_FAILURE_WATCHDOG] = "service failed to sent 
> > > > > watchdog ping",
> > > > "...failed to send..."
> > > > 
> > > > > +        [SERVICE_FAILURE_START_LIMIT] = "start of the service was 
> > > > > attempted too often too quickly",
> > > > > +};
> > > > > +
> > > > > +void log_job_error_with_service_result(const char* service, const 
> > > > > char *result) {
> > > > > +        bool quotes;
> > > > > +        _cleanup_free_ char *stream_buffer = NULL;
> > > > > +        _cleanup_fclose_ FILE *stream = NULL;
> > > > > +        char *service_maybe_quoted = NULL;
> > > > > +        size_t size;
> > > > > +        ServiceResult r;
> > > > > +
> > > > > +        assert(service);
> > > > > +        assert(result);
> > > > > +
> > > > > +        quotes = chars_intersect(service, SHELL_NEED_QUOTES);
> > > > > +
> > > > > +        service_maybe_quoted = alloca(strlen(service) + 3);
> > > > > +        sprintf(service_maybe_quoted, "%s%s%s", quotes ? "'" : "", 
> > > > > service, quotes ? "'" : "");
> > > > > +
> > > > > +        stream = open_memstream(&stream_buffer, &size);
> > > > > +        if (!stream)
> > > > > +                return;
> > > > > +
> > > > > +        r = service_result_from_string(result);
> > > > > +
> > > > > +        assert(r > _SERVICE_RESULT_INVALID && r < 
> > > > > _SERVICE_RESULT_MAX);
> > > > > +
> > > > > +        fprintf(stream, "Job for %s failed because %s. See 
> > > > > \"systemctl status %s\" and \"journalctl -xe\" for details.\n",
> > > > > +                service,
> > > > > +                service_result_error_message_table[r],
> > > > > +                service_maybe_quoted);
> > > > > +
> > > > > +        /* For some results maybe additional explanation is required 
> > > > > */
> > > > > +        if (streq_ptr(result, "start-limit"))
> > > > > +                fprintf(stream, "To force a start please invoke 
> > > > > \"systemctl reset-failed %s\" followed by \"systemctl start %s\" 
> > > > > again.",
> > > > > +                        service_maybe_quoted,
> > > > > +                        service_maybe_quoted);
> > > > > +
> > > > > +        fflush(stream);
> > > > > +
> > > > > +        log_error("%s", stream_buffer);
> > > > 
> > > > I don't understand why memstream is needed. Why not just do log_error(),
> > > > and optionally follow that by log_info() with the additional hint?
> > > 
> > > I used memstream because I did not want log twice in case that function is
> > > called from context where log target is not tty, i.e. not from systemctl.
> > I don't think this matters. Logging in two steps vs one matters when
> > doing structured logging, e.g. to the journal. But when writing to a file,
> > no matter if a tty or something else, doing it with one or two calls is 
> > pretty
> > much the same thing.
> 
> That is exactly my point. I wanted to allow people to call the function from
> whatever context they want. I didn't assume *anything* about log target and
> try to do the right thing regardless what it is.

But this is not a library function that can be called from upredictable 
contexts.
It is our internal utility, so we can tailor it to our current needs. If it ever
needs to be made more generic, we can do it when the need arises.

Zbyszek
_______________________________________________
systemd-devel mailing list
systemd-devel@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/systemd-devel

Reply via email to