On Thu, Apr 09, 2015 at 05:30:44PM +0200, Michal Sekletar wrote:
> On Thu, Apr 09, 2015 at 03:24:54PM +0000, Zbigniew Jędrzejewski-Szmek wrote:
> > On Thu, Apr 09, 2015 at 05:20:43PM +0200, Michal Sekletar wrote:
> > > On Thu, Apr 09, 2015 at 02:44:38PM +0000, Zbigniew Jędrzejewski-Szmek 
> > > wrote:
> > > > On Thu, Apr 09, 2015 at 04:35:53PM +0200, Michal Sekletar wrote:
> > > > > On Thu, Apr 09, 2015 at 02:10:14PM +0000, Zbigniew Jędrzejewski-Szmek 
> > > > > wrote:
> > > > > > On Thu, Apr 09, 2015 at 03:20:02PM +0200, Michal Sekletar wrote:
> > > > > > > Users might have hard time figuring out why exactly their 
> > > > > > > systemctl request
> > > > > > > failed. If dbus job fails try to figure out more details about 
> > > > > > > failure by
> > > > > > > examining Result property of the service.
> > > > > > > 
> > > > > > > https://bugzilla.redhat.com/show_bug.cgi?id=1016680
> > > > > > > ---
> > > > > > >  src/libsystemd/sd-bus/bus-util.c | 41 +++++++++++++++++++---
> > > > > > >  src/shared/log.c                 | 76 
> > > > > > > ++++++++++++++++++++++++++++++++++++++++
> > > > > > >  src/shared/log.h                 |  2 ++
> > > > > > >  3 files changed, 114 insertions(+), 5 deletions(-)
> > > > > > > 
> > > > > > > diff --git a/src/libsystemd/sd-bus/bus-util.c 
> > > > > > > b/src/libsystemd/sd-bus/bus-util.c
> > > > > > > index 6498769..bb4c993 100644
> > > > > > > --- a/src/libsystemd/sd-bus/bus-util.c
> > > > > > > +++ b/src/libsystemd/sd-bus/bus-util.c
> > > > > > > @@ -30,6 +30,7 @@
> > > > > > >  #include "path-util.h"
> > > > > > >  #include "missing.h"
> > > > > > >  #include "set.h"
> > > > > > > +#include "unit-name.h"
> > > > > > >  
> > > > > > >  #include "sd-bus.h"
> > > > > > >  #include "bus-error.h"
> > > > > > > @@ -1716,6 +1717,35 @@ static int bus_process_wait(sd_bus *bus) {
> > > > > > >          }
> > > > > > >  }
> > > > > > >  
> > > > > > > +static int bus_job_get_service_result(BusWaitForJobs *d, char 
> > > > > > > **result) {
> > > > > > > +        int r = 0;
> > > > > > > +        _cleanup_free_ char *dbus_path = NULL, *p = NULL;
> > > > > > > +
> > > > > > > +        assert(d);
> > > > > > > +        assert(d->result);
> > > > > > > +        assert(d->name);
> > > > > > > +        assert(result);
> > > > > > > +
> > > > > > > +        dbus_path = unit_dbus_path_from_name(d->name);
> > > > > > > +        if (!dbus_path)
> > > > > > > +                return -ENOMEM;
> > > > > > > +
> > > > > > > +        r = sd_bus_get_property_string(d->bus,
> > > > > > > +                                       
> > > > > > > "org.freedesktop.systemd1",
> > > > > > > +                                       dbus_path,
> > > > > > > +                                       
> > > > > > > "org.freedesktop.systemd1.Service",
> > > > > > > +                                       "Result",
> > > > > > > +                                       NULL,
> > > > > > > +                                       &p);
> > > > > > > +        if (r < 0)
> > > > > > > +                return r;
> > > > > > > +
> > > > > > > +        *result = p;
> > > > > > > +        p = NULL;
> > > > > > > +
> > > > > > > +        return 0;
> > > > > > > +}
> > > > > > > +
> > > > > > >  static int check_wait_response(BusWaitForJobs *d, bool quiet) {
> > > > > > >          int r = 0;
> > > > > > >  
> > > > > > > @@ -1736,13 +1766,14 @@ static int 
> > > > > > > check_wait_response(BusWaitForJobs *d, bool quiet) {
> > > > > > >                          log_error("Operation on or unit type of 
> > > > > > > %s not supported on this system.", strna(d->name));
> > > > > > >                  else if (!streq(d->result, "done") && 
> > > > > > > !streq(d->result, "skipped")) {
> > > > > > >                          if (d->name) {
> > > > > > > -                                bool quotes;
> > > > > > > +                                int q;
> > > > > > > +                                _cleanup_free_ char *result = 
> > > > > > > NULL;
> > > > > > >  
> > > > > > > -                                quotes = 
> > > > > > > chars_intersect(d->name, SHELL_NEED_QUOTES);
> > > > > > > +                                q = 
> > > > > > > bus_job_get_service_result(d, &result);
> > > > > > > +                                if (q < 0)
> > > > > > > +                                        log_debug_errno(q, 
> > > > > > > "Failed to get Result property of service %s: %m", d->name);
> > > > > > >  
> > > > > > > -                                log_error("Job for %s failed. 
> > > > > > > See \"systemctl status %s%s%s\" and \"journalctl -xe\" for 
> > > > > > > details.",
> > > > > > > -                                          d->name,
> > > > > > > -                                          quotes ? "'" : "", 
> > > > > > > d->name, quotes ? "'" : "");
> > > > > > > +                                
> > > > > > > log_job_error_with_service_result(d->name, result);
> > > > > > >                          } else
> > > > > > >                                  log_error("Job failed. See 
> > > > > > > \"journalctl -xe\" for details.");
> > > > > > >                  }
> > > > > > > diff --git a/src/shared/log.c b/src/shared/log.c
> > > > > > > index 646a1d6..3219756 100644
> > > > > > > --- a/src/shared/log.c
> > > > > > > +++ b/src/shared/log.c
> > > > > > > @@ -1061,3 +1061,79 @@ void log_received_signal(int level, const 
> > > > > > > struct signalfd_siginfo *si) {
> > > > > > >  void log_set_upgrade_syslog_to_journal(bool b) {
> > > > > > >          upgrade_syslog_to_journal = b;
> > > > > > >  }
> > > > > > > +
> > > > > > > +typedef enum ServiceResult {
> > > > > > > +        SERVICE_SUCCESS,
> > > > > > > +        SERVICE_FAILURE_RESOURCES,
> > > > > > > +        SERVICE_FAILURE_TIMEOUT,
> > > > > > > +        SERVICE_FAILURE_EXIT_CODE,
> > > > > > > +        SERVICE_FAILURE_SIGNAL,
> > > > > > > +        SERVICE_FAILURE_CORE_DUMP,
> > > > > > > +        SERVICE_FAILURE_WATCHDOG,
> > > > > > > +        SERVICE_FAILURE_START_LIMIT,
> > > > > > > +        _SERVICE_RESULT_MAX,
> > > > > > > +        _SERVICE_RESULT_INVALID = -1
> > > > > > > +} ServiceResult;
> > > > > > 
> > > > > > dbus property returns the result as a string. So we need to map 
> > > > > > this string
> > > > > > to an explanation. Going through the intermediate ServiceResult int 
> > > > > > does
> > > > > > not really gain anything... I think things would be simpler if you 
> > > > > > added a
> > > > > > table mapping result to explanation:
> > > > > > 
> > > > > > static const struct {
> > > > > >   const char *result, *explanation;
> > > > > > } explanations[] = {
> > > > > >   {"timeout", "start timeout was exceeded"},
> > > > > >   ...
> > > > > >   {}
> > > > > > };
> > > > > > 
> > > > > > and then simply looped over this table. This way it won't be 
> > > > > > necessary to
> > > > > > have three different lists.
> > > > > 
> > > > > I guess that what you are suggesting will remove a bit of boilerplate 
> > > > > code, will
> > > > > do that.
> > > > > 
> > > > > > 
> > > > > > This could live simply in systemctl.c. Those are only parts of
> > > > > > sentences, nothing generally useful.
> > > > > 
> > > > > I'd say that having general function for logging about failed dbus 
> > > > > job might be
> > > > > useful.
> > > > > 
> > > > > > 
> > > > > > > +static const char* const 
> > > > > > > service_result_table[_SERVICE_RESULT_MAX] = {
> > > > > > > +        [SERVICE_SUCCESS] = "success",
> > > > > > > +        [SERVICE_FAILURE_RESOURCES] = "resources",
> > > > > > > +        [SERVICE_FAILURE_TIMEOUT] = "timeout",
> > > > > > > +        [SERVICE_FAILURE_EXIT_CODE] = "exit-code",
> > > > > > > +        [SERVICE_FAILURE_SIGNAL] = "signal",
> > > > > > > +        [SERVICE_FAILURE_CORE_DUMP] = "core-dump",
> > > > > > > +        [SERVICE_FAILURE_WATCHDOG] = "watchdog",
> > > > > > > +        [SERVICE_FAILURE_START_LIMIT] = "start-limit"
> > > > > > > +};
> > > > > > > +
> > > > > > > +DEFINE_PRIVATE_STRING_TABLE_LOOKUP_FROM_STRING(service_result, 
> > > > > > > ServiceResult);
> > > > > > > +
> > > > > > > +static const char* const 
> > > > > > > service_result_error_message_table[_SERVICE_RESULT_MAX] = {
> > > > > > > +        [SERVICE_FAILURE_RESOURCES] = "configured resource limit 
> > > > > > > was exceeded",
> > > > > > > +        [SERVICE_FAILURE_TIMEOUT] = ,
> > > > > > > +        [SERVICE_FAILURE_EXIT_CODE] = "ExecStart process exited 
> > > > > > > with error code",
> > > > > > > +        [SERVICE_FAILURE_SIGNAL] = "fatal signal was delivered 
> > > > > > > to ExecStart process",
> > > > > > > +        [SERVICE_FAILURE_CORE_DUMP] = "fatal signal was 
> > > > > > > delivered to ExecStart process. Core dumped",
> > > > > > 
> > > > > > "fatal signal was delivered to ExecStart process and it dumped 
> > > > > > core"?
> > > > > > 
> > > > > > > +        [SERVICE_FAILURE_WATCHDOG] = "service failed to sent 
> > > > > > > watchdog ping",
> > > > > > "...failed to send..."
> > > > > > 
> > > > > > > +        [SERVICE_FAILURE_START_LIMIT] = "start of the service 
> > > > > > > was attempted too often too quickly",
> > > > > > > +};
> > > > > > > +
> > > > > > > +void log_job_error_with_service_result(const char* service, 
> > > > > > > const char *result) {
> > > > > > > +        bool quotes;
> > > > > > > +        _cleanup_free_ char *stream_buffer = NULL;
> > > > > > > +        _cleanup_fclose_ FILE *stream = NULL;
> > > > > > > +        char *service_maybe_quoted = NULL;
> > > > > > > +        size_t size;
> > > > > > > +        ServiceResult r;
> > > > > > > +
> > > > > > > +        assert(service);
> > > > > > > +        assert(result);
> > > > > > > +
> > > > > > > +        quotes = chars_intersect(service, SHELL_NEED_QUOTES);
> > > > > > > +
> > > > > > > +        service_maybe_quoted = alloca(strlen(service) + 3);
> > > > > > > +        sprintf(service_maybe_quoted, "%s%s%s", quotes ? "'" : 
> > > > > > > "", service, quotes ? "'" : "");
> > > > > > > +
> > > > > > > +        stream = open_memstream(&stream_buffer, &size);
> > > > > > > +        if (!stream)
> > > > > > > +                return;
> > > > > > > +
> > > > > > > +        r = service_result_from_string(result);
> > > > > > > +
> > > > > > > +        assert(r > _SERVICE_RESULT_INVALID && r < 
> > > > > > > _SERVICE_RESULT_MAX);
> > > > > > > +
> > > > > > > +        fprintf(stream, "Job for %s failed because %s. See 
> > > > > > > \"systemctl status %s\" and \"journalctl -xe\" for details.\n",
> > > > > > > +                service,
> > > > > > > +                service_result_error_message_table[r],
> > > > > > > +                service_maybe_quoted);
> > > > > > > +
> > > > > > > +        /* For some results maybe additional explanation is 
> > > > > > > required */
> > > > > > > +        if (streq_ptr(result, "start-limit"))
> > > > > > > +                fprintf(stream, "To force a start please invoke 
> > > > > > > \"systemctl reset-failed %s\" followed by \"systemctl start %s\" 
> > > > > > > again.",
> > > > > > > +                        service_maybe_quoted,
> > > > > > > +                        service_maybe_quoted);
> > > > > > > +
> > > > > > > +        fflush(stream);
> > > > > > > +
> > > > > > > +        log_error("%s", stream_buffer);
> > > > > > 
> > > > > > I don't understand why memstream is needed. Why not just do 
> > > > > > log_error(),
> > > > > > and optionally follow that by log_info() with the additional hint?
> > > > > 
> > > > > I used memstream because I did not want log twice in case that 
> > > > > function is
> > > > > called from context where log target is not tty, i.e. not from 
> > > > > systemctl.
> > > > I don't think this matters. Logging in two steps vs one matters when
> > > > doing structured logging, e.g. to the journal. But when writing to a 
> > > > file,
> > > > no matter if a tty or something else, doing it with one or two calls is 
> > > > pretty
> > > > much the same thing.
> > > 
> > > That is exactly my point. I wanted to allow people to call the function 
> > > from
> > > whatever context they want. I didn't assume *anything* about log target 
> > > and
> > > try to do the right thing regardless what it is.
> > 
> > But this is not a library function that can be called from upredictable 
> > contexts.
> > It is our internal utility, so we can tailor it to our current needs. If it 
> > ever
> > needs to be made more generic, we can do it when the need arises.
> 
> Hmm...But it is in src/shared/log.c which kind of makes it library function 
> (of
> our internal library though), doesn't it?

The rules for internal library functions are different then for public
stuff. Various functions are written in a way that they can only be
called from certain contexts or with certain assumptions, and it is the
job of the person using them to use them properly, or to modify them, taking
care of the whole tree.

Zbyszek
_______________________________________________
systemd-devel mailing list
systemd-devel@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/systemd-devel

Reply via email to