Hi, i know that someone else is about to send a fix for the same issue but with a different approach of fixing it. i'd like to wait for this 2nd patch to arrive before we decide which to merge
John On 09/02/2017 11:02, Alin Nastac wrote: > SIGKILL is sent if instance process is still running after > <term_timeout> seconds after SIGTERM has been sent. To prevent > another daemon process being launched before old process dies, > the instance is kept until SIGCHLD confirms that service has > been stopped. > > Signed-off-by: Alin Nastac <alin.nas...@gmail.com> > --- > service/instance.c | 44 +++++++++++++++++++++++++++++++++++++------- > service/instance.h | 1 + > service/service.c | 26 ++++++++++++++++---------- > service/service.h | 3 +++ > 4 files changed, 57 insertions(+), 17 deletions(-) > > diff --git a/service/instance.c b/service/instance.c > index 018db3c..4d340fd 100644 > --- a/service/instance.c > +++ b/service/instance.c > @@ -55,6 +55,7 @@ enum { > INSTANCE_ATTR_SECCOMP, > INSTANCE_ATTR_PIDFILE, > INSTANCE_ATTR_RELOADSIG, > + INSTANCE_ATTR_TERMTIMEOUT, > __INSTANCE_ATTR_MAX > }; > > @@ -79,6 +80,7 @@ static const struct blobmsg_policy > instance_attr[__INSTANCE_ATTR_MAX] = { > [INSTANCE_ATTR_SECCOMP] = { "seccomp", BLOBMSG_TYPE_STRING }, > [INSTANCE_ATTR_PIDFILE] = { "pidfile", BLOBMSG_TYPE_STRING }, > [INSTANCE_ATTR_RELOADSIG] = { "reload_signal", BLOBMSG_TYPE_INT32 }, > + [INSTANCE_ATTR_TERMTIMEOUT] = { "term_timeout", BLOBMSG_TYPE_INT32 }, > }; > > enum { > @@ -389,8 +391,16 @@ instance_start(struct service_instance *in) > return; > } > > - if (in->proc.pending || !in->command) > + if (!in->command) { > + LOG("Not starting instance %s::%s, command not set\n", > in->srv->name, in->name); > return; > + } > + > + if (in->proc.pending) { > + if (in->halt) > + in->restart = true; > + return; > + } > > instance_free_stdio(in); > if (in->_stdout.fd.fd > -2) { > @@ -408,7 +418,7 @@ instance_start(struct service_instance *in) > } > > in->restart = false; > - in->halt = !in->respawn; > + in->halt = false; > > if (!in->valid) > return; > @@ -494,7 +504,11 @@ instance_timeout(struct uloop_timeout *t) > > in = container_of(t, struct service_instance, timeout); > > - if (!in->halt && (in->restart || in->respawn)) > + if (in->halt) { > + LOG("Instance %s::%s pid %d not stopped on SIGTERM, sending > SIGKILL instead\n", > + in->srv->name, in->name, in->proc.pid); > + kill(in->proc.pid, SIGKILL); > + } else if (in->restart || in->respawn) > instance_start(in); > } > > @@ -515,8 +529,19 @@ instance_exit(struct uloop_process *p, int ret) > return; > > uloop_timeout_cancel(&in->timeout); > + service_event("instance.stop", in->srv->name, in->name); > + > if (in->halt) { > instance_removepid(in); > + if (in->restart) > + instance_start(in); > + else { > + struct service *s = in->srv; > + > + avl_delete(&s->instances.avl, &in->node.avl); > + instance_free(in); > + service_stopped(s); > + } > } else if (in->restart) { > instance_start(in); > } else if (in->respawn) { > @@ -535,7 +560,6 @@ instance_exit(struct uloop_process *p, int ret) > uloop_timeout_set(&in->timeout, in->respawn_timeout * > 1000); > } > } > - service_event("instance.stop", in->srv->name, in->name); > } > > void > @@ -546,6 +570,7 @@ instance_stop(struct service_instance *in) > in->halt = true; > in->restart = in->respawn = false; > kill(in->proc.pid, SIGTERM); > + uloop_timeout_set(&in->timeout, in->term_timeout * 1000); > } > > static void > @@ -559,10 +584,10 @@ instance_restart(struct service_instance *in) > return; > } > > - in->halt = false; > + in->halt = true; > in->restart = true; > kill(in->proc.pid, SIGTERM); > - instance_removepid(in); > + uloop_timeout_set(&in->timeout, in->term_timeout * 1000); > } > > static bool > @@ -796,6 +821,8 @@ instance_config_parse(struct service_instance *in) > if (!instance_config_parse_command(in, tb)) > return false; > > + if (tb[INSTANCE_ATTR_TERMTIMEOUT]) > + in->term_timeout = > blobmsg_get_u32(tb[INSTANCE_ATTR_TERMTIMEOUT]); > if (tb[INSTANCE_ATTR_RESPAWN]) { > int i = 0; > uint32_t vals[3] = { 3600, 5, 5}; > @@ -933,8 +960,9 @@ instance_update(struct service_instance *in, struct > service_instance *in_new) > { > bool changed = instance_config_changed(in, in_new); > bool running = in->proc.pending; > + bool stopping = in->halt; > > - if (!running) { > + if (!running || stopping) { > instance_config_move(in, in_new); > instance_start(in); > } else { > @@ -967,6 +995,7 @@ instance_init(struct service_instance *in, struct service > *s, struct blob_attr * > in->config = config; > in->timeout.cb = instance_timeout; > in->proc.cb = instance_exit; > + in->term_timeout = 5; > > in->_stdout.fd.fd = -2; > in->_stdout.stream.string_data = true; > @@ -999,6 +1028,7 @@ void instance_dump(struct blob_buf *b, struct > service_instance *in, int verbose) > blobmsg_add_u32(b, "pid", in->proc.pid); > if (in->command) > blobmsg_add_blob(b, in->command); > + blobmsg_add_u32(b, "term_timeout", in->term_timeout); > > if (!avl_is_empty(&in->errors.avl)) { > struct blobmsg_list_node *var; > diff --git a/service/instance.h b/service/instance.h > index 3cc2009..78999c8 100644 > --- a/service/instance.h > +++ b/service/instance.h > @@ -59,6 +59,7 @@ struct service_instance { > char *seccomp; > char *pidfile; > > + uint32_t term_timeout; > uint32_t respawn_timeout; > uint32_t respawn_threshold; > uint32_t respawn_retry; > diff --git a/service/service.c b/service/service.c > index 2c73901..0584ee0 100644 > --- a/service/service.c > +++ b/service/service.c > @@ -59,11 +59,10 @@ service_instance_update(struct vlist_tree *tree, struct > vlist_node *node_new, > instance_update(in_o, in_n); > instance_free(in_n); > } else if (in_o) { > - DEBUG(2, "Free instance %s::%s\n", in_o->srv->name, in_o->name); > + DEBUG(2, "Stop instance %s::%s\n", in_o->srv->name, in_o->name); > instance_stop(in_o); > - instance_free(in_o); > } else if (in_n) { > - DEBUG(2, "Create instance %s::%s\n", in_n->srv->name, > in_n->name); > + DEBUG(2, "Start instance %s::%s\n", in_n->srv->name, > in_n->name); > instance_start(in_n); > } > blob_buf_init(&b, 0); > @@ -80,7 +79,7 @@ service_alloc(const char *name) > strcpy(new_name, name); > > vlist_init(&s->instances, avl_strcmp, service_instance_update); > - s->instances.keep_old = true; > + s->instances.no_delete = true; > s->name = new_name; > s->avl.key = s->name; > INIT_LIST_HEAD(&s->validators); > @@ -149,13 +148,8 @@ service_update(struct service *s, struct blob_attr **tb, > bool add) > static void > service_delete(struct service *s) > { > - service_event("service.stop", s->name, NULL); > vlist_flush_all(&s->instances); > - avl_delete(&services, &s->avl); > - trigger_del(s); > - free(s->trigger); > - free(s); > - service_validate_del(s); > + service_stopped(s); > } > > enum { > @@ -606,6 +600,18 @@ service_start_early(char *name, char *cmdline) > return service_handle_set(NULL, NULL, NULL, "add", b.head); > } > > +void service_stopped(struct service *s) > +{ > + if (avl_is_empty(&s->instances.avl)) { > + service_event("service.stop", s->name, NULL); > + avl_delete(&services, &s->avl); > + trigger_del(s); > + free(s->trigger); > + free(s); > + service_validate_del(s); > + } > +} > + > void service_event(const char *type, const char *service, const char > *instance) > { > if (!ctx) > diff --git a/service/service.h b/service/service.h > index c3f2964..d4f0a83 100644 > --- a/service/service.h > +++ b/service/service.h > @@ -50,7 +50,10 @@ void service_validate_add(struct service *s, struct > blob_attr *attr); > void service_validate_dump(struct blob_buf *b, struct service *s); > void service_validate_dump_all(struct blob_buf *b, char *p, char *s); > int service_start_early(char *name, char *cmdline); > +void service_stopped(struct service *s); > void service_validate_del(struct service *s); > void service_event(const char *type, const char *service, const char > *instance); > > + > + > #endif > _______________________________________________ openwrt-devel mailing list openwrt-devel@lists.openwrt.org https://lists.openwrt.org/cgi-bin/mailman/listinfo/openwrt-devel