James Hunt has proposed merging 
lp:~jamesodhunt/upstart/bug-1447756-the-actual-fix into lp:upstart.

Requested reviews:
  Upstart Reviewers (upstart-reviewers)
Related bugs:
  Bug #1447756 in upstart : "segfault in log.c code causes phone reboot loops"
  https://bugs.launchpad.net/upstart/+bug/1447756

For more details, see:
https://code.launchpad.net/~jamesodhunt/upstart/bug-1447756-the-actual-fix/+merge/259020

* init/log.c:
  - log_clear_unflushed(): Handle logs on the unflushed list which
    get flushed automatically by their corresponding NihIo before
    a request to clear the unflushed list is made. This scenario
    is triggered when a job which has spawned processes exits, but
    where the spawned processes continue to run and then produce
    output before the request to clear the unflushed list is made
    (at the time the log partition becomes writable) but after the
    log partition becomes writable (LP: #1447756).
* init/tests/test_log.c:
  - test_log_new(): Added new test:
    "ensure logger unflushed list ignores already flushed data".
  - test_log_destroy(): Improved test
     "ensure unflushed data moved to unflushed list with uid 0" by giving 
    the log a parent and checking parent handling behaviour.
-- 
Your team Upstart Reviewers is requested to review the proposed merge of 
lp:~jamesodhunt/upstart/bug-1447756-the-actual-fix into lp:upstart.
=== modified file 'ChangeLog'
--- ChangeLog	2015-04-24 12:57:48 +0000
+++ ChangeLog	2015-05-13 15:53:09 +0000
@@ -1,3 +1,21 @@
+2015-05-12  James Hunt  <[email protected]>
+
+	* init/log.c:
+	  - log_clear_unflushed(): Handle logs on the unflushed list which
+	    get flushed automatically by their corresponding NihIo before
+	    a request to clear the unflushed list is made. This scenario
+	    is triggered when a job which has spawned processes exits, but
+	    where the spawned processes continue to run and then produce
+	    output before the request to clear the unflushed list is made
+	    (at the time the log partition becomes writable) but after the
+	    log partition becomes writable (LP: #1447756).
+	* init/tests/test_log.c:
+	  - test_log_new(): Added new test:
+	    "ensure logger unflushed list ignores already flushed data".
+	  - test_log_destroy(): Improved test
+	     "ensure unflushed data moved to unflushed list with uid 0" by giving 
+	    the log a parent and checking parent handling behaviour.
+
 2015-04-24  James Hunt  <[email protected]>
 
 	* init/log.c:

=== modified file 'init/log.c'
--- init/log.c	2015-04-24 12:57:48 +0000
+++ init/log.c	2015-05-13 15:53:09 +0000
@@ -802,6 +802,48 @@
 
 		nih_assert (log);
 
+		if (! log->unflushed->len) {
+			/* The job that originally owned this log has
+			 * exited, but it spawned one or more other
+			 * processes which still live on. If those
+			 * processes produce output, the NihIo will get
+			 * called automatically to flush the data.
+			 * However, that happens asynchronously to
+			 * clearing the unflushed list.
+			 *
+			 * Hence, if no unflushed data remains, remove
+			 * the entry from the list.
+			 *
+			 * Note that doing so frees the log and stops
+			 * any further output being logged from the
+			 * out-of-job process(es). This isn't ideal
+			 * however:
+			 *
+			 * - Better this than having the Log objects remain indefinately.
+			 *
+			 * - The problem is ultimately caused by a rogue
+			 *   job which should be fixed (see the warning
+			 *   that is produced for such jobs in
+			 *   log_read_watch()).
+			 *
+			 * - Even if the Log was retained, it would not
+			 *   be flushed unless multiple calls to this
+			 *   function are made (which is not going to
+			 *   occur by default).
+			 *
+			 * Note that it is necessary to set the io to
+			 * NULL since it has already been freed by
+			 * nih_io_closed(), but the io is not set to
+			 * NULL. If we don't do this, when log_destroy()
+			 * is called, it will attempt to dereference the 
+			 * already freed log->io.
+			 */
+			log->io = NULL;
+			nih_free (elem);
+
+			continue;
+		}
+
 		/* To be added to this list, log should have been
 		 * detached from its parent job.
 		 */

=== modified file 'init/tests/test_log.c'
--- init/tests/test_log.c	2015-04-24 12:57:48 +0000
+++ init/tests/test_log.c	2015-05-13 15:53:09 +0000
@@ -654,6 +654,94 @@
 	TEST_FREE (log);
 
 	/************************************************************/
+	TEST_FEATURE ("ensure logger unflushed list ignores already flushed data");
+
+	/* Reset */
+	log_flushed = 0;
+
+	TEST_EQ (openpty (&pty_master, &pty_slave, NULL, NULL, NULL), 0);
+
+	TEST_GT (sprintf (filename, "%s/test.log", dirname), 0);
+
+	TEST_NE (log_unflushed_files, NULL);
+	TEST_TRUE (NIH_LIST_EMPTY (log_unflushed_files));
+
+	TEST_EQ (stat (dirname, &statbuf), 0);
+
+	/* Save */
+	old_perms = statbuf.st_mode;
+
+	/* Make file inaccessible */
+	TEST_EQ (chmod (dirname, 0x0), 0);
+
+	log = log_new (NULL, filename, pty_master, 0);
+	TEST_NE_P (log, NULL);
+
+	/* Write data to simulate a job process, but _DON'T_ close the
+	 * fd (to simulate the job spawning another process which olds
+	 * the fd open).
+	 */
+	ret = write (pty_slave, str, strlen (str));
+	TEST_GT (ret, 0);
+	ret = write (pty_slave, "\n", 1);
+	TEST_EQ (ret, 1);
+
+	assert0 (log->unflushed->len);
+	TEST_EQ_P (log->unflushed->buf, NULL);
+
+	TEST_NE_P (log->io, NULL);
+
+	TEST_WATCH_UPDATE ();
+
+	/* nih_io_closed() should not have been called */
+	TEST_NE_P (log->io, NULL);
+
+	TEST_EQ_STR (log->unflushed->buf, "hello, world!\r\n");
+
+	/* +2 for '\r\n' */
+	TEST_EQ (log->unflushed->len, 2 + strlen (str));
+
+	/* Ensure no log file written */
+	TEST_LT (stat (filename, &statbuf), 0);
+
+	TEST_TRUE (NIH_LIST_EMPTY (log_unflushed_files));
+
+	TEST_FREE_TAG (log);
+
+	/* Force the usual code path for early boot on a normal system
+	 * when a job produces output before the disks are writable.
+	 */
+	log->open_errno = EROFS;
+
+	TEST_EQ (log_handle_unflushed (NULL, log), 0);
+
+	TEST_FALSE (NIH_LIST_EMPTY (log_unflushed_files));
+
+	/* Restore access */
+	TEST_EQ (chmod (dirname, old_perms), 0);
+
+	TEST_NE_P (log->io, NULL);
+
+	/* Simulate the data having already been flushed to disk
+	 * and nih_io_closed() having been called.
+	 */
+	log->io = NULL;
+	log->unflushed->len = 0;
+	log->unflushed->buf = NULL;
+	log->remote_closed = 1;
+
+	ret = log_clear_unflushed ();
+	TEST_EQ (ret, 0);
+
+	TEST_FREE (log);
+
+	/* Ensure no log file written */
+	TEST_LT (stat (filename, &statbuf), 0);
+
+	/* Tidy up */
+	close (pty_slave);
+	
+	/************************************************************/
 	TEST_FEATURE ("ensure logger flushes when destroyed with uid 0");
 
 	TEST_EQ (openpty (&pty_master, &pty_slave, NULL, NULL, NULL), 0);
@@ -1143,6 +1231,7 @@
 	int   fd;
 	NihListEntry *entry;
 	struct stat  statbuf;
+	char  *p;
 
 	TEST_FUNCTION ("log_destroy");
 
@@ -1227,9 +1316,14 @@
 	TEST_NE (fd, -1);
 	close (fd);
 
-	log = log_new (NULL, filename, pty_master, 0);
+	p = nih_strdup (NULL, "hello");
+	TEST_NE_P (p, NULL);
+
+	log = log_new (p, filename, pty_master, 0);
 	TEST_NE_P (log, NULL);
 
+	TEST_ALLOC_PARENT (log, p);
+
 	ret = write (pty_slave, str, strlen (str));
 	TEST_GT (ret, 0);
 
@@ -1244,16 +1338,26 @@
 	log_flushed = 0;
 
 	TEST_TRUE (NIH_LIST_EMPTY (log_unflushed_files));
-	ret = log_handle_unflushed (NULL, log);
-
+	ret = log_handle_unflushed (p, log);
 	TEST_EQ (ret, 0);
+
 	TEST_FALSE (NIH_LIST_EMPTY (log_unflushed_files));
 
+	TEST_ALLOC_NOT_PARENT (log, p);
+
 	entry = (NihListEntry *)log_unflushed_files->next;
 	TEST_NE_P (entry, NULL);
 	TEST_FREE_TAG (entry);
 	TEST_ALLOC_PARENT (entry, log_unflushed_files);
 
+	/* Free the original parent object and ensure it doesn't free
+	 * the log.
+	 */
+	nih_free (p);
+
+	TEST_FALSE (NIH_LIST_EMPTY (log_unflushed_files));
+	TEST_NOT_FREE (log);
+
 	tmp_log = entry->data;
 	TEST_EQ_P (tmp_log, log);
 	TEST_ALLOC_PARENT (log, entry);

-- 
upstart-devel mailing list
[email protected]
Modify settings or unsubscribe at: 
https://lists.ubuntu.com/mailman/listinfo/upstart-devel

Reply via email to