Re: [PATCH] ring-buffer: Fix polling on trace_pipe

2014-07-15 Thread Martin Lau
Yes, I think it should go to both stable and 3.16.

Thanks,
--Martin

On Tue, Jul 15, 2014 at 03:46:12PM -0400, Steven Rostedt wrote:
> On Thu, 10 Jul 2014 15:20:30 -0700
> Martin Lau  wrote:
> 
> > Hi Steve,
> > 
> > Do you have a chance to give it another try?
> > 
> 
> OK, I finally got around to testing it. Yep I see your point. Do you
> think it should go to 3.16 and stable? I can add it to my next git pull
> request.
> 
> Also, I modified your test such that it wouldn't hang on failure, but
> detects that it hung and returns a exit value to add this to my
> testing. That is, it exits with zero on success and non zero on failure.
> 
> -- Steve

> #include 
> #include 
> #include 
> #include 
> #include 
> #include 
> #include 
> #include 
> #include 
> #include 
> #include 
> #include 
> 
> static const char * debugfs_list[] = {
>   "/debug/tracing",
>   "/sys/kernel/debug/tracing",
>   "/d/tracing",
>   NULL,
> };
> 
> static const char *debugfs;
> static int markfd;
> static int trace_pipe_fd;
> 
> static void alog(const char *name, int ret)
> {
>   printf("%d: %s: %d\n", getpid(), name, ret);
> }
> 
> static const char *find_debugfs(void)
> {
>   struct stat st;
>   int i;
>   int r;
> 
>   for (i = 0; debugfs_list[i]; i++) {
>   r = stat(debugfs_list[i], );
>   if (r < 0)
>   continue;
>   if (S_ISDIR(st.st_mode))
>   return debugfs_list[i];
>   }
>   return NULL;
> }
> 
> static char * make_path(const char *file)
> {
>   char *path;
>   int size;
> 
>   size = strlen(debugfs) + strlen(file) + 2;
>   path = malloc(size);
>   if (!path) {
>   perror("malloc");
>   exit(-1);
>   }
>   sprintf(path, "%s/%s", debugfs, file);
>   return path;
> }
> 
> static void mark_write(const char *str)
> {
>   int ret;
>   ret = write(markfd, str, strlen(str));
>   alog("write(markfd)", ret);
> }
> 
> static void read_trace_pipe(void)
> {
>   char buf[1024];
>   int r;
> 
>   while ((r = read(trace_pipe_fd, buf, 1024)) > 0)
>   printf("%.*s", r, buf);
> }
> 
> int main (int argc, char **argv)
> {
>   struct epoll_event ee;
>   char *marker;
>   char *pipe;
>   int efd;
>   int ret;
>   pid_t dwrt_pid;
> 
>   debugfs = find_debugfs();
>   if (!debugfs) {
>   fprintf(stderr, "Could not find debugfs\n");
>   exit(-1);
>   }
> 
>   marker = make_path("trace_marker");
>   pipe = make_path("trace_pipe");
> 
>   markfd = open(marker, O_WRONLY);
>   if (markfd < 0) {
>   perror("marker");
>   exit(-1);
>   }
>   trace_pipe_fd = open(pipe, O_RDONLY|O_NONBLOCK);
>   if (trace_pipe_fd < 0) {
>   perror("trace_pipe");
>   exit(-1);
>   }
> 
>   efd = epoll_create(1);
>   if (efd < 0) {
>   perror("epoll_create");
>   exit(-1);
>   }
> 
>   mark_write("some data");
>   memset(, 0, sizeof(ee));
>   ee.events = EPOLLIN;
>   ret = epoll_ctl(efd, EPOLL_CTL_ADD, trace_pipe_fd, );
>   if (ret < 0) {
>   perror("epoll_ctl");
>   exit(-1);
>   }
>   alog("waiting data..", 0);
>   ret = epoll_wait(efd, , 1, -1);
>   alog("epoll_wait()", ret);
>   read_trace_pipe();
>   dwrt_pid = fork();
>   assert(dwrt_pid != -1);
>   if (dwrt_pid > 0) {
>   int status;
>   sleep(10);
>   mark_write("more data");
>   sleep(10);
>   ret = waitpid(dwrt_pid, , WNOHANG);
>   if (ret != dwrt_pid) {
>   alog("Poll never finished!", 0);
>   kill(dwrt_pid, 9);
>   exit(-1);
>   }
>   if (WEXITSTATUS(status) != 0) {
>   alog("Something failed on polling!",
>WEXITSTATUS(status));
>   exit(-1);
>   }
>   } else {
>   alog("waiting form more data..", 0);
>   ret = epoll_wait(efd, , 1, -1);
>   alog("epoll_wait()", ret);
>   read_trace_pipe();
>   if (ret < 0)
>   exit(errno);
>   }
>   exit (0);
> }

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH] ring-buffer: Fix polling on trace_pipe

2014-07-15 Thread Steven Rostedt
On Thu, 10 Jul 2014 15:20:30 -0700
Martin Lau  wrote:

> Hi Steve,
> 
> Do you have a chance to give it another try?
> 

OK, I finally got around to testing it. Yep I see your point. Do you
think it should go to 3.16 and stable? I can add it to my next git pull
request.

Also, I modified your test such that it wouldn't hang on failure, but
detects that it hung and returns a exit value to add this to my
testing. That is, it exits with zero on success and non zero on failure.

-- Steve
#include 
#include 
#include 
#include 
#include 
#include 
#include 
#include 
#include 
#include 
#include 
#include 

static const char * debugfs_list[] = {
	"/debug/tracing",
	"/sys/kernel/debug/tracing",
	"/d/tracing",
	NULL,
};

static const char *debugfs;
static int markfd;
static int trace_pipe_fd;

static void alog(const char *name, int ret)
{
	printf("%d: %s: %d\n", getpid(), name, ret);
}

static const char *find_debugfs(void)
{
	struct stat st;
	int i;
	int r;

	for (i = 0; debugfs_list[i]; i++) {
		r = stat(debugfs_list[i], );
		if (r < 0)
			continue;
		if (S_ISDIR(st.st_mode))
			return debugfs_list[i];
	}
	return NULL;
}

static char * make_path(const char *file)
{
	char *path;
	int size;

	size = strlen(debugfs) + strlen(file) + 2;
	path = malloc(size);
	if (!path) {
		perror("malloc");
		exit(-1);
	}
	sprintf(path, "%s/%s", debugfs, file);
	return path;
}

static void mark_write(const char *str)
{
	int ret;
	ret = write(markfd, str, strlen(str));
	alog("write(markfd)", ret);
}

static void read_trace_pipe(void)
{
	char buf[1024];
	int r;

	while ((r = read(trace_pipe_fd, buf, 1024)) > 0)
		printf("%.*s", r, buf);
}

int main (int argc, char **argv)
{
	struct epoll_event ee;
	char *marker;
	char *pipe;
	int efd;
	int ret;
	pid_t dwrt_pid;

	debugfs = find_debugfs();
	if (!debugfs) {
		fprintf(stderr, "Could not find debugfs\n");
		exit(-1);
	}

	marker = make_path("trace_marker");
	pipe = make_path("trace_pipe");

	markfd = open(marker, O_WRONLY);
	if (markfd < 0) {
		perror("marker");
		exit(-1);
	}
	trace_pipe_fd = open(pipe, O_RDONLY|O_NONBLOCK);
	if (trace_pipe_fd < 0) {
		perror("trace_pipe");
		exit(-1);
	}

	efd = epoll_create(1);
	if (efd < 0) {
		perror("epoll_create");
		exit(-1);
	}

	mark_write("some data");
	memset(, 0, sizeof(ee));
	ee.events = EPOLLIN;
	ret = epoll_ctl(efd, EPOLL_CTL_ADD, trace_pipe_fd, );
	if (ret < 0) {
		perror("epoll_ctl");
		exit(-1);
	}
	alog("waiting data..", 0);
	ret = epoll_wait(efd, , 1, -1);
	alog("epoll_wait()", ret);
	read_trace_pipe();
	dwrt_pid = fork();
	assert(dwrt_pid != -1);
	if (dwrt_pid > 0) {
		int status;
		sleep(10);
		mark_write("more data");
		sleep(10);
		ret = waitpid(dwrt_pid, , WNOHANG);
		if (ret != dwrt_pid) {
			alog("Poll never finished!", 0);
			kill(dwrt_pid, 9);
			exit(-1);
		}
		if (WEXITSTATUS(status) != 0) {
			alog("Something failed on polling!",
 WEXITSTATUS(status));
			exit(-1);
		}
	} else {
		alog("waiting form more data..", 0);
		ret = epoll_wait(efd, , 1, -1);
		alog("epoll_wait()", ret);
		read_trace_pipe();
		if (ret < 0)
			exit(errno);
	}
	exit (0);
}


Re: [PATCH] ring-buffer: Fix polling on trace_pipe

2014-07-15 Thread Chris Mason


On 06/10/2014 02:06 AM, Martin Lau wrote:
> ring_buffer_poll_wait() should always put the poll_table to its wait_queue
> even there is immediate data available.  Otherwise, the following epoll and
> read sequence will eventually hang forever:
> 
> 1. Put some data to make the trace_pipe ring_buffer read ready first
> 2. epoll_ctl(efd, EPOLL_CTL_ADD, trace_pipe_fd, ee)
> 3. epoll_wait()
> 4. read(trace_pipe_fd) till EAGAIN
> 5. Add some more data to the trace_pipe ring_buffer
> 6. epoll_wait() -> this epoll_wait() will block forever
> 
> ~ During the epoll_ctl(efd, EPOLL_CTL_ADD,...) call in step 2,
>   ring_buffer_poll_wait() returns immediately without adding poll_table,
>   which has poll_table->_qproc pointing to ep_poll_callback(), to its
>   wait_queue.
> ~ During the epoll_wait() call in step 3 and step 6,
>   ring_buffer_poll_wait() cannot add ep_poll_callback() to its wait_queue
>   because the poll_table->_qproc is NULL and it is how epoll works.
> ~ When there is new data available in step 6, ring_buffer does not know
>   it has to call ep_poll_callback() because it is not in its wait queue.
>   Hence, block forever.
> 
> Other poll implementation seems to call poll_wait() unconditionally as the 
> very
> first thing to do.  For example, tcp_poll() in tcp.c.

Reviewed-by: Chris Mason 

This looked horribly wrong to me at first, but Martin walked me through
how the polling code is setting up waiters.  We have it in production here.
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH] ring-buffer: Fix polling on trace_pipe

2014-07-15 Thread Chris Mason


On 06/10/2014 02:06 AM, Martin Lau wrote:
 ring_buffer_poll_wait() should always put the poll_table to its wait_queue
 even there is immediate data available.  Otherwise, the following epoll and
 read sequence will eventually hang forever:
 
 1. Put some data to make the trace_pipe ring_buffer read ready first
 2. epoll_ctl(efd, EPOLL_CTL_ADD, trace_pipe_fd, ee)
 3. epoll_wait()
 4. read(trace_pipe_fd) till EAGAIN
 5. Add some more data to the trace_pipe ring_buffer
 6. epoll_wait() - this epoll_wait() will block forever
 
 ~ During the epoll_ctl(efd, EPOLL_CTL_ADD,...) call in step 2,
   ring_buffer_poll_wait() returns immediately without adding poll_table,
   which has poll_table-_qproc pointing to ep_poll_callback(), to its
   wait_queue.
 ~ During the epoll_wait() call in step 3 and step 6,
   ring_buffer_poll_wait() cannot add ep_poll_callback() to its wait_queue
   because the poll_table-_qproc is NULL and it is how epoll works.
 ~ When there is new data available in step 6, ring_buffer does not know
   it has to call ep_poll_callback() because it is not in its wait queue.
   Hence, block forever.
 
 Other poll implementation seems to call poll_wait() unconditionally as the 
 very
 first thing to do.  For example, tcp_poll() in tcp.c.

Reviewed-by: Chris Mason c...@fb.com

This looked horribly wrong to me at first, but Martin walked me through
how the polling code is setting up waiters.  We have it in production here.
--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH] ring-buffer: Fix polling on trace_pipe

2014-07-15 Thread Steven Rostedt
On Thu, 10 Jul 2014 15:20:30 -0700
Martin Lau ka...@fb.com wrote:

 Hi Steve,
 
 Do you have a chance to give it another try?
 

OK, I finally got around to testing it. Yep I see your point. Do you
think it should go to 3.16 and stable? I can add it to my next git pull
request.

Also, I modified your test such that it wouldn't hang on failure, but
detects that it hung and returns a exit value to add this to my
testing. That is, it exits with zero on success and non zero on failure.

-- Steve
#include stdio.h
#include stdlib.h
#include string.h
#include sys/types.h
#include sys/wait.h
#include sys/stat.h
#include sys/epoll.h
#include fcntl.h
#include unistd.h
#include assert.h
#include signal.h
#include errno.h

static const char * debugfs_list[] = {
	/debug/tracing,
	/sys/kernel/debug/tracing,
	/d/tracing,
	NULL,
};

static const char *debugfs;
static int markfd;
static int trace_pipe_fd;

static void alog(const char *name, int ret)
{
	printf(%d: %s: %d\n, getpid(), name, ret);
}

static const char *find_debugfs(void)
{
	struct stat st;
	int i;
	int r;

	for (i = 0; debugfs_list[i]; i++) {
		r = stat(debugfs_list[i], st);
		if (r  0)
			continue;
		if (S_ISDIR(st.st_mode))
			return debugfs_list[i];
	}
	return NULL;
}

static char * make_path(const char *file)
{
	char *path;
	int size;

	size = strlen(debugfs) + strlen(file) + 2;
	path = malloc(size);
	if (!path) {
		perror(malloc);
		exit(-1);
	}
	sprintf(path, %s/%s, debugfs, file);
	return path;
}

static void mark_write(const char *str)
{
	int ret;
	ret = write(markfd, str, strlen(str));
	alog(write(markfd), ret);
}

static void read_trace_pipe(void)
{
	char buf[1024];
	int r;

	while ((r = read(trace_pipe_fd, buf, 1024))  0)
		printf(%.*s, r, buf);
}

int main (int argc, char **argv)
{
	struct epoll_event ee;
	char *marker;
	char *pipe;
	int efd;
	int ret;
	pid_t dwrt_pid;

	debugfs = find_debugfs();
	if (!debugfs) {
		fprintf(stderr, Could not find debugfs\n);
		exit(-1);
	}

	marker = make_path(trace_marker);
	pipe = make_path(trace_pipe);

	markfd = open(marker, O_WRONLY);
	if (markfd  0) {
		perror(marker);
		exit(-1);
	}
	trace_pipe_fd = open(pipe, O_RDONLY|O_NONBLOCK);
	if (trace_pipe_fd  0) {
		perror(trace_pipe);
		exit(-1);
	}

	efd = epoll_create(1);
	if (efd  0) {
		perror(epoll_create);
		exit(-1);
	}

	mark_write(some data);
	memset(ee, 0, sizeof(ee));
	ee.events = EPOLLIN;
	ret = epoll_ctl(efd, EPOLL_CTL_ADD, trace_pipe_fd, ee);
	if (ret  0) {
		perror(epoll_ctl);
		exit(-1);
	}
	alog(waiting data.., 0);
	ret = epoll_wait(efd, ee, 1, -1);
	alog(epoll_wait(), ret);
	read_trace_pipe();
	dwrt_pid = fork();
	assert(dwrt_pid != -1);
	if (dwrt_pid  0) {
		int status;
		sleep(10);
		mark_write(more data);
		sleep(10);
		ret = waitpid(dwrt_pid, status, WNOHANG);
		if (ret != dwrt_pid) {
			alog(Poll never finished!, 0);
			kill(dwrt_pid, 9);
			exit(-1);
		}
		if (WEXITSTATUS(status) != 0) {
			alog(Something failed on polling!,
 WEXITSTATUS(status));
			exit(-1);
		}
	} else {
		alog(waiting form more data.., 0);
		ret = epoll_wait(efd, ee, 1, -1);
		alog(epoll_wait(), ret);
		read_trace_pipe();
		if (ret  0)
			exit(errno);
	}
	exit (0);
}


Re: [PATCH] ring-buffer: Fix polling on trace_pipe

2014-07-15 Thread Martin Lau
Yes, I think it should go to both stable and 3.16.

Thanks,
--Martin

On Tue, Jul 15, 2014 at 03:46:12PM -0400, Steven Rostedt wrote:
 On Thu, 10 Jul 2014 15:20:30 -0700
 Martin Lau ka...@fb.com wrote:
 
  Hi Steve,
  
  Do you have a chance to give it another try?
  
 
 OK, I finally got around to testing it. Yep I see your point. Do you
 think it should go to 3.16 and stable? I can add it to my next git pull
 request.
 
 Also, I modified your test such that it wouldn't hang on failure, but
 detects that it hung and returns a exit value to add this to my
 testing. That is, it exits with zero on success and non zero on failure.
 
 -- Steve

 #include stdio.h
 #include stdlib.h
 #include string.h
 #include sys/types.h
 #include sys/wait.h
 #include sys/stat.h
 #include sys/epoll.h
 #include fcntl.h
 #include unistd.h
 #include assert.h
 #include signal.h
 #include errno.h
 
 static const char * debugfs_list[] = {
   /debug/tracing,
   /sys/kernel/debug/tracing,
   /d/tracing,
   NULL,
 };
 
 static const char *debugfs;
 static int markfd;
 static int trace_pipe_fd;
 
 static void alog(const char *name, int ret)
 {
   printf(%d: %s: %d\n, getpid(), name, ret);
 }
 
 static const char *find_debugfs(void)
 {
   struct stat st;
   int i;
   int r;
 
   for (i = 0; debugfs_list[i]; i++) {
   r = stat(debugfs_list[i], st);
   if (r  0)
   continue;
   if (S_ISDIR(st.st_mode))
   return debugfs_list[i];
   }
   return NULL;
 }
 
 static char * make_path(const char *file)
 {
   char *path;
   int size;
 
   size = strlen(debugfs) + strlen(file) + 2;
   path = malloc(size);
   if (!path) {
   perror(malloc);
   exit(-1);
   }
   sprintf(path, %s/%s, debugfs, file);
   return path;
 }
 
 static void mark_write(const char *str)
 {
   int ret;
   ret = write(markfd, str, strlen(str));
   alog(write(markfd), ret);
 }
 
 static void read_trace_pipe(void)
 {
   char buf[1024];
   int r;
 
   while ((r = read(trace_pipe_fd, buf, 1024))  0)
   printf(%.*s, r, buf);
 }
 
 int main (int argc, char **argv)
 {
   struct epoll_event ee;
   char *marker;
   char *pipe;
   int efd;
   int ret;
   pid_t dwrt_pid;
 
   debugfs = find_debugfs();
   if (!debugfs) {
   fprintf(stderr, Could not find debugfs\n);
   exit(-1);
   }
 
   marker = make_path(trace_marker);
   pipe = make_path(trace_pipe);
 
   markfd = open(marker, O_WRONLY);
   if (markfd  0) {
   perror(marker);
   exit(-1);
   }
   trace_pipe_fd = open(pipe, O_RDONLY|O_NONBLOCK);
   if (trace_pipe_fd  0) {
   perror(trace_pipe);
   exit(-1);
   }
 
   efd = epoll_create(1);
   if (efd  0) {
   perror(epoll_create);
   exit(-1);
   }
 
   mark_write(some data);
   memset(ee, 0, sizeof(ee));
   ee.events = EPOLLIN;
   ret = epoll_ctl(efd, EPOLL_CTL_ADD, trace_pipe_fd, ee);
   if (ret  0) {
   perror(epoll_ctl);
   exit(-1);
   }
   alog(waiting data.., 0);
   ret = epoll_wait(efd, ee, 1, -1);
   alog(epoll_wait(), ret);
   read_trace_pipe();
   dwrt_pid = fork();
   assert(dwrt_pid != -1);
   if (dwrt_pid  0) {
   int status;
   sleep(10);
   mark_write(more data);
   sleep(10);
   ret = waitpid(dwrt_pid, status, WNOHANG);
   if (ret != dwrt_pid) {
   alog(Poll never finished!, 0);
   kill(dwrt_pid, 9);
   exit(-1);
   }
   if (WEXITSTATUS(status) != 0) {
   alog(Something failed on polling!,
WEXITSTATUS(status));
   exit(-1);
   }
   } else {
   alog(waiting form more data.., 0);
   ret = epoll_wait(efd, ee, 1, -1);
   alog(epoll_wait(), ret);
   read_trace_pipe();
   if (ret  0)
   exit(errno);
   }
   exit (0);
 }

--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH] ring-buffer: Fix polling on trace_pipe

2014-07-10 Thread Martin Lau
Hi Steve,

Do you have a chance to give it another try?

Thanks,
Martin

On Thu, Jun 26, 2014 at 11:52:44PM -0700, Martin Lau wrote:
> Hi Steve,
> 
> I retried the test program (with the kernel patch).  It does block from
> time to time.  I spotted the ee.events is not set to EPOLLIN before
> calling epll_ctl(eft, EPOLL_CTL_ADD,...).  I fixed it and ran
> the test in a bash-loop.
> 
> I have the kafai-2 version attached (with some more logging in case
> if it still blocks).
> 
> Can you retry?
> 
> Thanks,
> Martin
> 
> On Thu, Jun 26, 2014 at 08:53:27PM -0400, Steven Rostedt wrote:
> > On Thu, 26 Jun 2014 11:34:46 -0700
> > Martin Lau  wrote:
> > 
> > > Hi Steve,
> > > 
> > > Can the modified test program reproduce the problem in your test setup?
> > 
> > Ah sorry, got distracted by other work.
> > 
> > OK, I just tried it out, and here's my results:
> > 
> > I ran you code with my current kernel and this is what I got:
> > 
> > # ./ftrace-test-epoll-kafai 
> ><...>-3183  [002] ...181.777891: tracing_mark_write:
> > some data 3183: waitting for more data..
> > 3184: written more data
> > 
> > And it just hung there.
> > 
> > 
> > Then I applied your patch, compiled and booted it, and ran the test
> > again, and I got this:
> > 
> > # ./ftrace-test-epoll-kafai
> > 
> > It just hung there. No forward progress.
> > 
> > I don't think that was the result you intended.
> > 
> > -- Steve
> > 
> > 

> #include 
> #include 
> #include 
> #include 
> #include 
> #include 
> #include 
> #include 
> #include 
> 
> static const char * debugfs_list[] = {
>   "/debug/tracing",
>   "/sys/kernel/debug/tracing",
>   "/d/tracing",
>   NULL,
> };
> 
> static const char *debugfs;
> static int markfd;
> static int trace_pipe_fd;
> 
> static void alog(const char *name, int ret)
> {
>   printf("%d: %s: %d\n", getpid(), name, ret);
> }
> 
> static const char *find_debugfs(void)
> {
>   struct stat st;
>   int i;
>   int r;
> 
>   for (i = 0; debugfs_list[i]; i++) {
>   r = stat(debugfs_list[i], );
>   if (r < 0)
>   continue;
>   if (S_ISDIR(st.st_mode))
>   return debugfs_list[i];
>   }
>   return NULL;
> }
> 
> static char * make_path(const char *file)
> {
>   char *path;
>   int size;
> 
>   size = strlen(debugfs) + strlen(file) + 2;
>   path = malloc(size);
>   if (!path) {
>   perror("malloc");
>   exit(-1);
>   }
>   sprintf(path, "%s/%s", debugfs, file);
>   return path;
> }
> 
> static void mark_write(const char *str)
> {
>   int ret;
>   ret = write(markfd, str, strlen(str));
>   alog("write(markfd)", ret);
> }
> 
> static void read_trace_pipe(void)
> {
>   char buf[1024];
>   int r;
> 
>   while ((r = read(trace_pipe_fd, buf, 1024)) > 0)
>   printf("%.*s", r, buf);
> }
> 
> int main (int argc, char **argv)
> {
>   struct epoll_event ee;
>   char *marker;
>   char *pipe;
>   int efd;
>   int ret;
>   pid_t dwrt_pid;
> 
>   debugfs = find_debugfs();
>   if (!debugfs) {
>   fprintf(stderr, "Could not find debugfs\n");
>   exit(-1);
>   }
> 
>   marker = make_path("trace_marker");
>   pipe = make_path("trace_pipe");
> 
>   markfd = open(marker, O_WRONLY);
>   if (markfd < 0) {
>   perror("marker");
>   exit(-1);
>   }
>   trace_pipe_fd = open(pipe, O_RDONLY|O_NONBLOCK);
>   if (trace_pipe_fd < 0) {
>   perror("trace_pipe");
>   exit(-1);
>   }
> 
>   efd = epoll_create(1);
>   if (efd < 0) {
>   perror("epoll_create");
>   exit(-1);
>   }
> 
>   mark_write("some data");
>   memset(, 0, sizeof(ee));
>   ee.events = EPOLLIN;
>   ret = epoll_ctl(efd, EPOLL_CTL_ADD, trace_pipe_fd, );
>   if (ret < 0) {
>   perror("epoll_ctl");
>   exit(-1);
>   }
>   alog("waiting data..", 0);
>   ret = epoll_wait(efd, , 1, -1);
>   alog("epoll_wait()", ret);
>   read_trace_pipe();
>   dwrt_pid = fork();
>   assert(dwrt_pid != -1);
>   if (dwrt_pid == 0) {
>   sleep(10);
>   mark_write("more data");
>   } else {
>   alog("waiting form more data..", 0);
>   ret = epoll_wait(efd, , 1, -1);
>   alog("epoll_wait()", ret);
>   read_trace_pipe();
>   wait(NULL);
>   }
>   return 0;
> }

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH] ring-buffer: Fix polling on trace_pipe

2014-07-10 Thread Martin Lau
Hi Steve,

Do you have a chance to give it another try?

Thanks,
Martin

On Thu, Jun 26, 2014 at 11:52:44PM -0700, Martin Lau wrote:
 Hi Steve,
 
 I retried the test program (with the kernel patch).  It does block from
 time to time.  I spotted the ee.events is not set to EPOLLIN before
 calling epll_ctl(eft, EPOLL_CTL_ADD,...).  I fixed it and ran
 the test in a bash-loop.
 
 I have the kafai-2 version attached (with some more logging in case
 if it still blocks).
 
 Can you retry?
 
 Thanks,
 Martin
 
 On Thu, Jun 26, 2014 at 08:53:27PM -0400, Steven Rostedt wrote:
  On Thu, 26 Jun 2014 11:34:46 -0700
  Martin Lau ka...@fb.com wrote:
  
   Hi Steve,
   
   Can the modified test program reproduce the problem in your test setup?
  
  Ah sorry, got distracted by other work.
  
  OK, I just tried it out, and here's my results:
  
  I ran you code with my current kernel and this is what I got:
  
  # ./ftrace-test-epoll-kafai 
 ...-3183  [002] ...181.777891: tracing_mark_write:
  some data 3183: waitting for more data..
  3184: written more data
  
  And it just hung there.
  
  
  Then I applied your patch, compiled and booted it, and ran the test
  again, and I got this:
  
  # ./ftrace-test-epoll-kafai
  
  It just hung there. No forward progress.
  
  I don't think that was the result you intended.
  
  -- Steve
  
  

 #include stdio.h
 #include stdlib.h
 #include string.h
 #include sys/types.h
 #include sys/stat.h
 #include sys/epoll.h
 #include fcntl.h
 #include unistd.h
 #include assert.h
 
 static const char * debugfs_list[] = {
   /debug/tracing,
   /sys/kernel/debug/tracing,
   /d/tracing,
   NULL,
 };
 
 static const char *debugfs;
 static int markfd;
 static int trace_pipe_fd;
 
 static void alog(const char *name, int ret)
 {
   printf(%d: %s: %d\n, getpid(), name, ret);
 }
 
 static const char *find_debugfs(void)
 {
   struct stat st;
   int i;
   int r;
 
   for (i = 0; debugfs_list[i]; i++) {
   r = stat(debugfs_list[i], st);
   if (r  0)
   continue;
   if (S_ISDIR(st.st_mode))
   return debugfs_list[i];
   }
   return NULL;
 }
 
 static char * make_path(const char *file)
 {
   char *path;
   int size;
 
   size = strlen(debugfs) + strlen(file) + 2;
   path = malloc(size);
   if (!path) {
   perror(malloc);
   exit(-1);
   }
   sprintf(path, %s/%s, debugfs, file);
   return path;
 }
 
 static void mark_write(const char *str)
 {
   int ret;
   ret = write(markfd, str, strlen(str));
   alog(write(markfd), ret);
 }
 
 static void read_trace_pipe(void)
 {
   char buf[1024];
   int r;
 
   while ((r = read(trace_pipe_fd, buf, 1024))  0)
   printf(%.*s, r, buf);
 }
 
 int main (int argc, char **argv)
 {
   struct epoll_event ee;
   char *marker;
   char *pipe;
   int efd;
   int ret;
   pid_t dwrt_pid;
 
   debugfs = find_debugfs();
   if (!debugfs) {
   fprintf(stderr, Could not find debugfs\n);
   exit(-1);
   }
 
   marker = make_path(trace_marker);
   pipe = make_path(trace_pipe);
 
   markfd = open(marker, O_WRONLY);
   if (markfd  0) {
   perror(marker);
   exit(-1);
   }
   trace_pipe_fd = open(pipe, O_RDONLY|O_NONBLOCK);
   if (trace_pipe_fd  0) {
   perror(trace_pipe);
   exit(-1);
   }
 
   efd = epoll_create(1);
   if (efd  0) {
   perror(epoll_create);
   exit(-1);
   }
 
   mark_write(some data);
   memset(ee, 0, sizeof(ee));
   ee.events = EPOLLIN;
   ret = epoll_ctl(efd, EPOLL_CTL_ADD, trace_pipe_fd, ee);
   if (ret  0) {
   perror(epoll_ctl);
   exit(-1);
   }
   alog(waiting data.., 0);
   ret = epoll_wait(efd, ee, 1, -1);
   alog(epoll_wait(), ret);
   read_trace_pipe();
   dwrt_pid = fork();
   assert(dwrt_pid != -1);
   if (dwrt_pid == 0) {
   sleep(10);
   mark_write(more data);
   } else {
   alog(waiting form more data.., 0);
   ret = epoll_wait(efd, ee, 1, -1);
   alog(epoll_wait(), ret);
   read_trace_pipe();
   wait(NULL);
   }
   return 0;
 }

--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH] ring-buffer: Fix polling on trace_pipe

2014-06-27 Thread Martin Lau
Hi Steve,

I retried the test program (with the kernel patch).  It does block from
time to time.  I spotted the ee.events is not set to EPOLLIN before
calling epll_ctl(eft, EPOLL_CTL_ADD,...).  I fixed it and ran
the test in a bash-loop.

I have the kafai-2 version attached (with some more logging in case
if it still blocks).

Can you retry?

Thanks,
Martin

On Thu, Jun 26, 2014 at 08:53:27PM -0400, Steven Rostedt wrote:
> On Thu, 26 Jun 2014 11:34:46 -0700
> Martin Lau  wrote:
> 
> > Hi Steve,
> > 
> > Can the modified test program reproduce the problem in your test setup?
> 
> Ah sorry, got distracted by other work.
> 
> OK, I just tried it out, and here's my results:
> 
> I ran you code with my current kernel and this is what I got:
> 
> # ./ftrace-test-epoll-kafai 
><...>-3183  [002] ...181.777891: tracing_mark_write:
> some data 3183: waitting for more data..
> 3184: written more data
> 
> And it just hung there.
> 
> 
> Then I applied your patch, compiled and booted it, and ran the test
> again, and I got this:
> 
> # ./ftrace-test-epoll-kafai
> 
> It just hung there. No forward progress.
> 
> I don't think that was the result you intended.
> 
> -- Steve
> 
> 
#include 
#include 
#include 
#include 
#include 
#include 
#include 
#include 
#include 

static const char * debugfs_list[] = {
"/debug/tracing",
"/sys/kernel/debug/tracing",
"/d/tracing",
NULL,
};

static const char *debugfs;
static int markfd;
static int trace_pipe_fd;

static void alog(const char *name, int ret)
{
printf("%d: %s: %d\n", getpid(), name, ret);
}

static const char *find_debugfs(void)
{
struct stat st;
int i;
int r;

for (i = 0; debugfs_list[i]; i++) {
r = stat(debugfs_list[i], );
if (r < 0)
continue;
if (S_ISDIR(st.st_mode))
return debugfs_list[i];
}
return NULL;
}

static char * make_path(const char *file)
{
char *path;
int size;

size = strlen(debugfs) + strlen(file) + 2;
path = malloc(size);
if (!path) {
perror("malloc");
exit(-1);
}
sprintf(path, "%s/%s", debugfs, file);
return path;
}

static void mark_write(const char *str)
{
int ret;
ret = write(markfd, str, strlen(str));
alog("write(markfd)", ret);
}

static void read_trace_pipe(void)
{
char buf[1024];
int r;

while ((r = read(trace_pipe_fd, buf, 1024)) > 0)
printf("%.*s", r, buf);
}

int main (int argc, char **argv)
{
struct epoll_event ee;
char *marker;
char *pipe;
int efd;
int ret;
pid_t dwrt_pid;

debugfs = find_debugfs();
if (!debugfs) {
fprintf(stderr, "Could not find debugfs\n");
exit(-1);
}

marker = make_path("trace_marker");
pipe = make_path("trace_pipe");

markfd = open(marker, O_WRONLY);
if (markfd < 0) {
perror("marker");
exit(-1);
}
trace_pipe_fd = open(pipe, O_RDONLY|O_NONBLOCK);
if (trace_pipe_fd < 0) {
perror("trace_pipe");
exit(-1);
}

efd = epoll_create(1);
if (efd < 0) {
perror("epoll_create");
exit(-1);
}

mark_write("some data");
memset(, 0, sizeof(ee));
ee.events = EPOLLIN;
ret = epoll_ctl(efd, EPOLL_CTL_ADD, trace_pipe_fd, );
if (ret < 0) {
perror("epoll_ctl");
exit(-1);
}
alog("waiting data..", 0);
ret = epoll_wait(efd, , 1, -1);
alog("epoll_wait()", ret);
read_trace_pipe();
dwrt_pid = fork();
assert(dwrt_pid != -1);
if (dwrt_pid == 0) {
sleep(10);
mark_write("more data");
} else {
alog("waiting form more data..", 0);
ret = epoll_wait(efd, , 1, -1);
alog("epoll_wait()", ret);
read_trace_pipe();
wait(NULL);
}
return 0;
}


Re: [PATCH] ring-buffer: Fix polling on trace_pipe

2014-06-27 Thread Martin Lau
Hi Steve,

I retried the test program (with the kernel patch).  It does block from
time to time.  I spotted the ee.events is not set to EPOLLIN before
calling epll_ctl(eft, EPOLL_CTL_ADD,...).  I fixed it and ran
the test in a bash-loop.

I have the kafai-2 version attached (with some more logging in case
if it still blocks).

Can you retry?

Thanks,
Martin

On Thu, Jun 26, 2014 at 08:53:27PM -0400, Steven Rostedt wrote:
 On Thu, 26 Jun 2014 11:34:46 -0700
 Martin Lau ka...@fb.com wrote:
 
  Hi Steve,
  
  Can the modified test program reproduce the problem in your test setup?
 
 Ah sorry, got distracted by other work.
 
 OK, I just tried it out, and here's my results:
 
 I ran you code with my current kernel and this is what I got:
 
 # ./ftrace-test-epoll-kafai 
...-3183  [002] ...181.777891: tracing_mark_write:
 some data 3183: waitting for more data..
 3184: written more data
 
 And it just hung there.
 
 
 Then I applied your patch, compiled and booted it, and ran the test
 again, and I got this:
 
 # ./ftrace-test-epoll-kafai
 
 It just hung there. No forward progress.
 
 I don't think that was the result you intended.
 
 -- Steve
 
 
#include stdio.h
#include stdlib.h
#include string.h
#include sys/types.h
#include sys/stat.h
#include sys/epoll.h
#include fcntl.h
#include unistd.h
#include assert.h

static const char * debugfs_list[] = {
/debug/tracing,
/sys/kernel/debug/tracing,
/d/tracing,
NULL,
};

static const char *debugfs;
static int markfd;
static int trace_pipe_fd;

static void alog(const char *name, int ret)
{
printf(%d: %s: %d\n, getpid(), name, ret);
}

static const char *find_debugfs(void)
{
struct stat st;
int i;
int r;

for (i = 0; debugfs_list[i]; i++) {
r = stat(debugfs_list[i], st);
if (r  0)
continue;
if (S_ISDIR(st.st_mode))
return debugfs_list[i];
}
return NULL;
}

static char * make_path(const char *file)
{
char *path;
int size;

size = strlen(debugfs) + strlen(file) + 2;
path = malloc(size);
if (!path) {
perror(malloc);
exit(-1);
}
sprintf(path, %s/%s, debugfs, file);
return path;
}

static void mark_write(const char *str)
{
int ret;
ret = write(markfd, str, strlen(str));
alog(write(markfd), ret);
}

static void read_trace_pipe(void)
{
char buf[1024];
int r;

while ((r = read(trace_pipe_fd, buf, 1024))  0)
printf(%.*s, r, buf);
}

int main (int argc, char **argv)
{
struct epoll_event ee;
char *marker;
char *pipe;
int efd;
int ret;
pid_t dwrt_pid;

debugfs = find_debugfs();
if (!debugfs) {
fprintf(stderr, Could not find debugfs\n);
exit(-1);
}

marker = make_path(trace_marker);
pipe = make_path(trace_pipe);

markfd = open(marker, O_WRONLY);
if (markfd  0) {
perror(marker);
exit(-1);
}
trace_pipe_fd = open(pipe, O_RDONLY|O_NONBLOCK);
if (trace_pipe_fd  0) {
perror(trace_pipe);
exit(-1);
}

efd = epoll_create(1);
if (efd  0) {
perror(epoll_create);
exit(-1);
}

mark_write(some data);
memset(ee, 0, sizeof(ee));
ee.events = EPOLLIN;
ret = epoll_ctl(efd, EPOLL_CTL_ADD, trace_pipe_fd, ee);
if (ret  0) {
perror(epoll_ctl);
exit(-1);
}
alog(waiting data.., 0);
ret = epoll_wait(efd, ee, 1, -1);
alog(epoll_wait(), ret);
read_trace_pipe();
dwrt_pid = fork();
assert(dwrt_pid != -1);
if (dwrt_pid == 0) {
sleep(10);
mark_write(more data);
} else {
alog(waiting form more data.., 0);
ret = epoll_wait(efd, ee, 1, -1);
alog(epoll_wait(), ret);
read_trace_pipe();
wait(NULL);
}
return 0;
}


Re: [PATCH] ring-buffer: Fix polling on trace_pipe

2014-06-26 Thread Steven Rostedt
On Thu, 26 Jun 2014 11:34:46 -0700
Martin Lau  wrote:

> Hi Steve,
> 
> Can the modified test program reproduce the problem in your test setup?

Ah sorry, got distracted by other work.

OK, I just tried it out, and here's my results:

I ran you code with my current kernel and this is what I got:

# ./ftrace-test-epoll-kafai 
   <...>-3183  [002] ...181.777891: tracing_mark_write:
some data 3183: waitting for more data..
3184: written more data

And it just hung there.


Then I applied your patch, compiled and booted it, and ran the test
again, and I got this:

# ./ftrace-test-epoll-kafai

It just hung there. No forward progress.

I don't think that was the result you intended.

-- Steve


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH] ring-buffer: Fix polling on trace_pipe

2014-06-26 Thread Martin Lau
Hi Steve,

Can the modified test program reproduce the problem in your test setup?

Thanks,
--Martin

On Tue, Jun 10, 2014 at 10:58:14PM -0700, Martin Lau wrote:
> Hi Steve,
> 
> Attached is the modified test program.  Here is the sample output:
> 
> localhost ~ # ./ftrace-test-epoll-kafai
><...>-1857  [000] ...1   720.174295: tracing_mark_write: some data
> 1857: waitting for more data..
> 1858: written more data
> 
> 
> Thanks,
> --Martin
> 
> On Tue, Jun 10, 2014 at 11:49:15AM -0400, Steven Rostedt wrote:
> > On Mon, 9 Jun 2014 23:06:42 -0700
> > Martin Lau  wrote:
> > 
> > > ring_buffer_poll_wait() should always put the poll_table to its wait_queue
> > > even there is immediate data available.  Otherwise, the following epoll 
> > > and
> > > read sequence will eventually hang forever:
> > > 
> > > 1. Put some data to make the trace_pipe ring_buffer read ready first
> > > 2. epoll_ctl(efd, EPOLL_CTL_ADD, trace_pipe_fd, ee)
> > > 3. epoll_wait()
> > > 4. read(trace_pipe_fd) till EAGAIN
> > > 5. Add some more data to the trace_pipe ring_buffer
> > > 6. epoll_wait() -> this epoll_wait() will block forever
> > > 
> > > ~ During the epoll_ctl(efd, EPOLL_CTL_ADD,...) call in step 2,
> > >   ring_buffer_poll_wait() returns immediately without adding poll_table,
> > >   which has poll_table->_qproc pointing to ep_poll_callback(), to its
> > >   wait_queue.
> > > ~ During the epoll_wait() call in step 3 and step 6,
> > >   ring_buffer_poll_wait() cannot add ep_poll_callback() to its wait_queue
> > >   because the poll_table->_qproc is NULL and it is how epoll works.
> > > ~ When there is new data available in step 6, ring_buffer does not know
> > >   it has to call ep_poll_callback() because it is not in its wait queue.
> > >   Hence, block forever.
> > > 
> > > Other poll implementation seems to call poll_wait() unconditionally as 
> > > the very
> > > first thing to do.  For example, tcp_poll() in tcp.c.
> > 
> > I'm trying to see the effect of this bug, but can't seem to reproduce
> > it. Maybe I did something wrong. Attached is a test program I wrote
> > trying to follow your instructions. I don't use epoll, so perhaps I
> > didn't use it correctly.
> > 
> > Can you modify it to show me the problem this is trying to fix. That
> > is, without this patch it hangs, but with the patch it does not.
> > 
> > Thanks!
> > 
> > -- Steve
> > 
> > > 
> > > Signed-off-by: Martin Lau 
> > > ---
> > >  kernel/trace/ring_buffer.c | 4 
> > >  1 file changed, 4 deletions(-)
> > > 
> > > diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
> > > index fd12cc5..a6e64e8 100644
> > > --- a/kernel/trace/ring_buffer.c
> > > +++ b/kernel/trace/ring_buffer.c
> > > @@ -613,10 +613,6 @@ int ring_buffer_poll_wait(struct ring_buffer 
> > > *buffer, int cpu,
> > >   struct ring_buffer_per_cpu *cpu_buffer;
> > >   struct rb_irq_work *work;
> > >  
> > > - if ((cpu == RING_BUFFER_ALL_CPUS && !ring_buffer_empty(buffer)) ||
> > > - (cpu != RING_BUFFER_ALL_CPUS && !ring_buffer_empty_cpu(buffer, 
> > > cpu)))
> > > - return POLLIN | POLLRDNORM;
> > > -
> > >   if (cpu == RING_BUFFER_ALL_CPUS)
> > >   work = >irq_work;
> > >   else {
> > 
> 

> #include 
> #include 
> #include 
> #include 
> #include 
> #include 
> #include 
> #include 
> #include 
> 
> static const char * debugfs_list[] = {
>   "/debug/tracing",
>   "/sys/kernel/debug/tracing",
>   "/d/tracing",
>   NULL,
> };
> 
> static const char *debugfs;
> static int markfd;
> static int trace_pipe_fd;
> 
> static const char *find_debugfs(void)
> {
>   struct stat st;
>   int i;
>   int r;
> 
>   for (i = 0; debugfs_list[i]; i++) {
>   r = stat(debugfs_list[i], );
>   if (r < 0)
>   continue;
>   if (S_ISDIR(st.st_mode))
>   return debugfs_list[i];
>   }
>   return NULL;
> }
> 
> static char * make_path(const char *file)
> {
>   char *path;
>   int size;
> 
>   size = strlen(debugfs) + strlen(file) + 2;
>   path = malloc(size);
>   if (!path) {
>   perror("malloc");
>   exit(-1);
>   }
>   sprintf(path, "%s/%s", debugfs, file);
>   return path;
> }
> 
> static void mark_write(const char *str)
> {
>   write(markfd, str, strlen(str));
> }
> 
> static void read_trace_pipe(void)
> {
>   char buf[1024];
>   int r;
> 
>   while ((r = read(trace_pipe_fd, buf, 1024)) > 0)
>   printf("%.*s", r, buf);
> }
> 
> int main (int argc, char **argv)
> {
>   struct epoll_event ee;
>   char *marker;
>   char *pipe;
>   int efd;
>   int ret;
>   pid_t dwrt_pid;
> 
>   debugfs = find_debugfs();
>   if (!debugfs) {
>   fprintf(stderr, "Could not find debugfs\n");
>   exit(-1);
>   }
> 
>   marker = make_path("trace_marker");
>   pipe = make_path("trace_pipe");
> 
>   markfd = 

Re: [PATCH] ring-buffer: Fix polling on trace_pipe

2014-06-26 Thread Martin Lau
Hi Steve,

Can the modified test program reproduce the problem in your test setup?

Thanks,
--Martin

On Tue, Jun 10, 2014 at 10:58:14PM -0700, Martin Lau wrote:
 Hi Steve,
 
 Attached is the modified test program.  Here is the sample output:
 
 localhost ~ # ./ftrace-test-epoll-kafai
...-1857  [000] ...1   720.174295: tracing_mark_write: some data
 1857: waitting for more data..
 1858: written more data
 block here
 
 Thanks,
 --Martin
 
 On Tue, Jun 10, 2014 at 11:49:15AM -0400, Steven Rostedt wrote:
  On Mon, 9 Jun 2014 23:06:42 -0700
  Martin Lau ka...@fb.com wrote:
  
   ring_buffer_poll_wait() should always put the poll_table to its wait_queue
   even there is immediate data available.  Otherwise, the following epoll 
   and
   read sequence will eventually hang forever:
   
   1. Put some data to make the trace_pipe ring_buffer read ready first
   2. epoll_ctl(efd, EPOLL_CTL_ADD, trace_pipe_fd, ee)
   3. epoll_wait()
   4. read(trace_pipe_fd) till EAGAIN
   5. Add some more data to the trace_pipe ring_buffer
   6. epoll_wait() - this epoll_wait() will block forever
   
   ~ During the epoll_ctl(efd, EPOLL_CTL_ADD,...) call in step 2,
 ring_buffer_poll_wait() returns immediately without adding poll_table,
 which has poll_table-_qproc pointing to ep_poll_callback(), to its
 wait_queue.
   ~ During the epoll_wait() call in step 3 and step 6,
 ring_buffer_poll_wait() cannot add ep_poll_callback() to its wait_queue
 because the poll_table-_qproc is NULL and it is how epoll works.
   ~ When there is new data available in step 6, ring_buffer does not know
 it has to call ep_poll_callback() because it is not in its wait queue.
 Hence, block forever.
   
   Other poll implementation seems to call poll_wait() unconditionally as 
   the very
   first thing to do.  For example, tcp_poll() in tcp.c.
  
  I'm trying to see the effect of this bug, but can't seem to reproduce
  it. Maybe I did something wrong. Attached is a test program I wrote
  trying to follow your instructions. I don't use epoll, so perhaps I
  didn't use it correctly.
  
  Can you modify it to show me the problem this is trying to fix. That
  is, without this patch it hangs, but with the patch it does not.
  
  Thanks!
  
  -- Steve
  
   
   Signed-off-by: Martin Lau ka...@fb.com
   ---
kernel/trace/ring_buffer.c | 4 
1 file changed, 4 deletions(-)
   
   diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
   index fd12cc5..a6e64e8 100644
   --- a/kernel/trace/ring_buffer.c
   +++ b/kernel/trace/ring_buffer.c
   @@ -613,10 +613,6 @@ int ring_buffer_poll_wait(struct ring_buffer 
   *buffer, int cpu,
 struct ring_buffer_per_cpu *cpu_buffer;
 struct rb_irq_work *work;

   - if ((cpu == RING_BUFFER_ALL_CPUS  !ring_buffer_empty(buffer)) ||
   - (cpu != RING_BUFFER_ALL_CPUS  !ring_buffer_empty_cpu(buffer, 
   cpu)))
   - return POLLIN | POLLRDNORM;
   -
 if (cpu == RING_BUFFER_ALL_CPUS)
 work = buffer-irq_work;
 else {
  
 

 #include stdio.h
 #include stdlib.h
 #include string.h
 #include sys/types.h
 #include sys/stat.h
 #include sys/epoll.h
 #include fcntl.h
 #include unistd.h
 #include assert.h
 
 static const char * debugfs_list[] = {
   /debug/tracing,
   /sys/kernel/debug/tracing,
   /d/tracing,
   NULL,
 };
 
 static const char *debugfs;
 static int markfd;
 static int trace_pipe_fd;
 
 static const char *find_debugfs(void)
 {
   struct stat st;
   int i;
   int r;
 
   for (i = 0; debugfs_list[i]; i++) {
   r = stat(debugfs_list[i], st);
   if (r  0)
   continue;
   if (S_ISDIR(st.st_mode))
   return debugfs_list[i];
   }
   return NULL;
 }
 
 static char * make_path(const char *file)
 {
   char *path;
   int size;
 
   size = strlen(debugfs) + strlen(file) + 2;
   path = malloc(size);
   if (!path) {
   perror(malloc);
   exit(-1);
   }
   sprintf(path, %s/%s, debugfs, file);
   return path;
 }
 
 static void mark_write(const char *str)
 {
   write(markfd, str, strlen(str));
 }
 
 static void read_trace_pipe(void)
 {
   char buf[1024];
   int r;
 
   while ((r = read(trace_pipe_fd, buf, 1024))  0)
   printf(%.*s, r, buf);
 }
 
 int main (int argc, char **argv)
 {
   struct epoll_event ee;
   char *marker;
   char *pipe;
   int efd;
   int ret;
   pid_t dwrt_pid;
 
   debugfs = find_debugfs();
   if (!debugfs) {
   fprintf(stderr, Could not find debugfs\n);
   exit(-1);
   }
 
   marker = make_path(trace_marker);
   pipe = make_path(trace_pipe);
 
   markfd = open(marker, O_WRONLY);
   if (markfd  0) {
   perror(marker);
   exit(-1);
   }
   trace_pipe_fd = open(pipe, O_RDONLY|O_NONBLOCK);
   if (trace_pipe_fd  0) {
  

Re: [PATCH] ring-buffer: Fix polling on trace_pipe

2014-06-26 Thread Steven Rostedt
On Thu, 26 Jun 2014 11:34:46 -0700
Martin Lau ka...@fb.com wrote:

 Hi Steve,
 
 Can the modified test program reproduce the problem in your test setup?

Ah sorry, got distracted by other work.

OK, I just tried it out, and here's my results:

I ran you code with my current kernel and this is what I got:

# ./ftrace-test-epoll-kafai 
   ...-3183  [002] ...181.777891: tracing_mark_write:
some data 3183: waitting for more data..
3184: written more data

And it just hung there.


Then I applied your patch, compiled and booted it, and ran the test
again, and I got this:

# ./ftrace-test-epoll-kafai

It just hung there. No forward progress.

I don't think that was the result you intended.

-- Steve


--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH] ring-buffer: Fix polling on trace_pipe

2014-06-10 Thread Martin Lau
Hi Steve,

Attached is the modified test program.  Here is the sample output:

localhost ~ # ./ftrace-test-epoll-kafai
   <...>-1857  [000] ...1   720.174295: tracing_mark_write: some data
1857: waitting for more data..
1858: written more data


Thanks,
--Martin

On Tue, Jun 10, 2014 at 11:49:15AM -0400, Steven Rostedt wrote:
> On Mon, 9 Jun 2014 23:06:42 -0700
> Martin Lau  wrote:
> 
> > ring_buffer_poll_wait() should always put the poll_table to its wait_queue
> > even there is immediate data available.  Otherwise, the following epoll and
> > read sequence will eventually hang forever:
> > 
> > 1. Put some data to make the trace_pipe ring_buffer read ready first
> > 2. epoll_ctl(efd, EPOLL_CTL_ADD, trace_pipe_fd, ee)
> > 3. epoll_wait()
> > 4. read(trace_pipe_fd) till EAGAIN
> > 5. Add some more data to the trace_pipe ring_buffer
> > 6. epoll_wait() -> this epoll_wait() will block forever
> > 
> > ~ During the epoll_ctl(efd, EPOLL_CTL_ADD,...) call in step 2,
> >   ring_buffer_poll_wait() returns immediately without adding poll_table,
> >   which has poll_table->_qproc pointing to ep_poll_callback(), to its
> >   wait_queue.
> > ~ During the epoll_wait() call in step 3 and step 6,
> >   ring_buffer_poll_wait() cannot add ep_poll_callback() to its wait_queue
> >   because the poll_table->_qproc is NULL and it is how epoll works.
> > ~ When there is new data available in step 6, ring_buffer does not know
> >   it has to call ep_poll_callback() because it is not in its wait queue.
> >   Hence, block forever.
> > 
> > Other poll implementation seems to call poll_wait() unconditionally as the 
> > very
> > first thing to do.  For example, tcp_poll() in tcp.c.
> 
> I'm trying to see the effect of this bug, but can't seem to reproduce
> it. Maybe I did something wrong. Attached is a test program I wrote
> trying to follow your instructions. I don't use epoll, so perhaps I
> didn't use it correctly.
> 
> Can you modify it to show me the problem this is trying to fix. That
> is, without this patch it hangs, but with the patch it does not.
> 
> Thanks!
> 
> -- Steve
> 
> > 
> > Signed-off-by: Martin Lau 
> > ---
> >  kernel/trace/ring_buffer.c | 4 
> >  1 file changed, 4 deletions(-)
> > 
> > diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
> > index fd12cc5..a6e64e8 100644
> > --- a/kernel/trace/ring_buffer.c
> > +++ b/kernel/trace/ring_buffer.c
> > @@ -613,10 +613,6 @@ int ring_buffer_poll_wait(struct ring_buffer *buffer, 
> > int cpu,
> > struct ring_buffer_per_cpu *cpu_buffer;
> > struct rb_irq_work *work;
> >  
> > -   if ((cpu == RING_BUFFER_ALL_CPUS && !ring_buffer_empty(buffer)) ||
> > -   (cpu != RING_BUFFER_ALL_CPUS && !ring_buffer_empty_cpu(buffer, 
> > cpu)))
> > -   return POLLIN | POLLRDNORM;
> > -
> > if (cpu == RING_BUFFER_ALL_CPUS)
> > work = >irq_work;
> > else {
> 

#include 
#include 
#include 
#include 
#include 
#include 
#include 
#include 
#include 

static const char * debugfs_list[] = {
"/debug/tracing",
"/sys/kernel/debug/tracing",
"/d/tracing",
NULL,
};

static const char *debugfs;
static int markfd;
static int trace_pipe_fd;

static const char *find_debugfs(void)
{
struct stat st;
int i;
int r;

for (i = 0; debugfs_list[i]; i++) {
r = stat(debugfs_list[i], );
if (r < 0)
continue;
if (S_ISDIR(st.st_mode))
return debugfs_list[i];
}
return NULL;
}

static char * make_path(const char *file)
{
char *path;
int size;

size = strlen(debugfs) + strlen(file) + 2;
path = malloc(size);
if (!path) {
perror("malloc");
exit(-1);
}
sprintf(path, "%s/%s", debugfs, file);
return path;
}

static void mark_write(const char *str)
{
write(markfd, str, strlen(str));
}

static void read_trace_pipe(void)
{
char buf[1024];
int r;

while ((r = read(trace_pipe_fd, buf, 1024)) > 0)
printf("%.*s", r, buf);
}

int main (int argc, char **argv)
{
struct epoll_event ee;
char *marker;
char *pipe;
int efd;
int ret;
pid_t dwrt_pid;

debugfs = find_debugfs();
if (!debugfs) {
fprintf(stderr, "Could not find debugfs\n");
exit(-1);
}

marker = make_path("trace_marker");
pipe = make_path("trace_pipe");

markfd = open(marker, O_WRONLY);
if (markfd < 0) {
perror("marker");
exit(-1);
}
trace_pipe_fd = open(pipe, O_RDONLY|O_NONBLOCK);
if (trace_pipe_fd < 0) {
perror("trace_pipe");
exit(-1);
}

efd = epoll_create(1);
if (efd < 0) {
perror("epoll_create");
exit(-1);
   

Re: [PATCH] ring-buffer: Fix polling on trace_pipe

2014-06-10 Thread Steven Rostedt
On Mon, 9 Jun 2014 23:06:42 -0700
Martin Lau  wrote:

> ring_buffer_poll_wait() should always put the poll_table to its wait_queue
> even there is immediate data available.  Otherwise, the following epoll and
> read sequence will eventually hang forever:
> 
> 1. Put some data to make the trace_pipe ring_buffer read ready first
> 2. epoll_ctl(efd, EPOLL_CTL_ADD, trace_pipe_fd, ee)
> 3. epoll_wait()
> 4. read(trace_pipe_fd) till EAGAIN
> 5. Add some more data to the trace_pipe ring_buffer
> 6. epoll_wait() -> this epoll_wait() will block forever
> 
> ~ During the epoll_ctl(efd, EPOLL_CTL_ADD,...) call in step 2,
>   ring_buffer_poll_wait() returns immediately without adding poll_table,
>   which has poll_table->_qproc pointing to ep_poll_callback(), to its
>   wait_queue.
> ~ During the epoll_wait() call in step 3 and step 6,
>   ring_buffer_poll_wait() cannot add ep_poll_callback() to its wait_queue
>   because the poll_table->_qproc is NULL and it is how epoll works.
> ~ When there is new data available in step 6, ring_buffer does not know
>   it has to call ep_poll_callback() because it is not in its wait queue.
>   Hence, block forever.
> 
> Other poll implementation seems to call poll_wait() unconditionally as the 
> very
> first thing to do.  For example, tcp_poll() in tcp.c.

I'm trying to see the effect of this bug, but can't seem to reproduce
it. Maybe I did something wrong. Attached is a test program I wrote
trying to follow your instructions. I don't use epoll, so perhaps I
didn't use it correctly.

Can you modify it to show me the problem this is trying to fix. That
is, without this patch it hangs, but with the patch it does not.

Thanks!

-- Steve

> 
> Signed-off-by: Martin Lau 
> ---
>  kernel/trace/ring_buffer.c | 4 
>  1 file changed, 4 deletions(-)
> 
> diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
> index fd12cc5..a6e64e8 100644
> --- a/kernel/trace/ring_buffer.c
> +++ b/kernel/trace/ring_buffer.c
> @@ -613,10 +613,6 @@ int ring_buffer_poll_wait(struct ring_buffer *buffer, 
> int cpu,
>   struct ring_buffer_per_cpu *cpu_buffer;
>   struct rb_irq_work *work;
>  
> - if ((cpu == RING_BUFFER_ALL_CPUS && !ring_buffer_empty(buffer)) ||
> - (cpu != RING_BUFFER_ALL_CPUS && !ring_buffer_empty_cpu(buffer, 
> cpu)))
> - return POLLIN | POLLRDNORM;
> -
>   if (cpu == RING_BUFFER_ALL_CPUS)
>   work = >irq_work;
>   else {

#include 
#include 
#include 
#include 
#include 
#include 
#include 
#include 

static const char * debugfs_list[] = {
	"/debug/tracing",
	"/sys/kernel/debug/tracing",
	"/d/tracing",
	NULL,
};

static const char *debugfs;
static int markfd;
static int trace_pipe_fd;

static const char *find_debugfs(void)
{
	struct stat st;
	int i;
	int r;

	for (i = 0; debugfs_list[i]; i++) {
		r = stat(debugfs_list[i], );
		if (r < 0)
			continue;
		if (S_ISDIR(st.st_mode))
			return debugfs_list[i];
	}
	return NULL;
}

static char * make_path(const char *file)
{
	char *path;
	int size;

	size = strlen(debugfs) + strlen(file) + 2;
	path = malloc(size);
	if (!path) {
		perror("malloc");
		exit(-1);
	}
	sprintf(path, "%s/%s", debugfs, file);
	return path;
}

static void mark_write(const char *str)
{
	write(markfd, str, strlen(str));
}

static void read_trace_pipe(void)
{
	char buf[1024];
	int r;

	while ((r = read(trace_pipe_fd, buf, 1024)) > 0)
		printf("%.*s", r, buf);

}

int main (int argc, char **argv)
{
	struct epoll_event ee;
	char *marker;
	char *pipe;
	int efd;
	int ret;

	debugfs = find_debugfs();
	if (!debugfs) {
		fprintf(stderr, "Could not find debugfs\n");
		exit(-1);
	}

	marker = make_path("trace_marker");
	pipe = make_path("trace_pipe");

	markfd = open(marker, O_WRONLY);
	if (markfd < 0) {
		perror("marker");
		exit(-1);
	}
	trace_pipe_fd = open(pipe, O_RDONLY|O_NONBLOCK);
	if (trace_pipe_fd < 0) {
		perror("trace_pipe");
		exit(-1);
	}

	efd = epoll_create(1);
	if (efd < 0) {
		perror("epoll_create");
		exit(-1);
	}

	mark_write("some data");
	ret = epoll_ctl(efd, EPOLL_CTL_ADD, trace_pipe_fd, );
	if (ret < 0) {
		perror("epoll_ctl");
		exit(-1);
	}
	epoll_wait(efd, , 1, 0);
	read_trace_pipe();
	mark_write("more data");
	epoll_wait(efd, , 1, 0);
	read_trace_pipe();

	return 0;
}


[PATCH] ring-buffer: Fix polling on trace_pipe

2014-06-10 Thread Martin Lau
ring_buffer_poll_wait() should always put the poll_table to its wait_queue
even there is immediate data available.  Otherwise, the following epoll and
read sequence will eventually hang forever:

1. Put some data to make the trace_pipe ring_buffer read ready first
2. epoll_ctl(efd, EPOLL_CTL_ADD, trace_pipe_fd, ee)
3. epoll_wait()
4. read(trace_pipe_fd) till EAGAIN
5. Add some more data to the trace_pipe ring_buffer
6. epoll_wait() -> this epoll_wait() will block forever

~ During the epoll_ctl(efd, EPOLL_CTL_ADD,...) call in step 2,
  ring_buffer_poll_wait() returns immediately without adding poll_table,
  which has poll_table->_qproc pointing to ep_poll_callback(), to its
  wait_queue.
~ During the epoll_wait() call in step 3 and step 6,
  ring_buffer_poll_wait() cannot add ep_poll_callback() to its wait_queue
  because the poll_table->_qproc is NULL and it is how epoll works.
~ When there is new data available in step 6, ring_buffer does not know
  it has to call ep_poll_callback() because it is not in its wait queue.
  Hence, block forever.

Other poll implementation seems to call poll_wait() unconditionally as the very
first thing to do.  For example, tcp_poll() in tcp.c.

Signed-off-by: Martin Lau 
---
 kernel/trace/ring_buffer.c | 4 
 1 file changed, 4 deletions(-)

diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index fd12cc5..a6e64e8 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -613,10 +613,6 @@ int ring_buffer_poll_wait(struct ring_buffer *buffer, int 
cpu,
struct ring_buffer_per_cpu *cpu_buffer;
struct rb_irq_work *work;
 
-   if ((cpu == RING_BUFFER_ALL_CPUS && !ring_buffer_empty(buffer)) ||
-   (cpu != RING_BUFFER_ALL_CPUS && !ring_buffer_empty_cpu(buffer, 
cpu)))
-   return POLLIN | POLLRDNORM;
-
if (cpu == RING_BUFFER_ALL_CPUS)
work = >irq_work;
else {
-- 
1.8.1

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH] ring-buffer: Fix polling on trace_pipe

2014-06-10 Thread Martin Lau
ring_buffer_poll_wait() should always put the poll_table to its wait_queue
even there is immediate data available.  Otherwise, the following epoll and
read sequence will eventually hang forever:

1. Put some data to make the trace_pipe ring_buffer read ready first
2. epoll_ctl(efd, EPOLL_CTL_ADD, trace_pipe_fd, ee)
3. epoll_wait()
4. read(trace_pipe_fd) till EAGAIN
5. Add some more data to the trace_pipe ring_buffer
6. epoll_wait() - this epoll_wait() will block forever

~ During the epoll_ctl(efd, EPOLL_CTL_ADD,...) call in step 2,
  ring_buffer_poll_wait() returns immediately without adding poll_table,
  which has poll_table-_qproc pointing to ep_poll_callback(), to its
  wait_queue.
~ During the epoll_wait() call in step 3 and step 6,
  ring_buffer_poll_wait() cannot add ep_poll_callback() to its wait_queue
  because the poll_table-_qproc is NULL and it is how epoll works.
~ When there is new data available in step 6, ring_buffer does not know
  it has to call ep_poll_callback() because it is not in its wait queue.
  Hence, block forever.

Other poll implementation seems to call poll_wait() unconditionally as the very
first thing to do.  For example, tcp_poll() in tcp.c.

Signed-off-by: Martin Lau ka...@fb.com
---
 kernel/trace/ring_buffer.c | 4 
 1 file changed, 4 deletions(-)

diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index fd12cc5..a6e64e8 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -613,10 +613,6 @@ int ring_buffer_poll_wait(struct ring_buffer *buffer, int 
cpu,
struct ring_buffer_per_cpu *cpu_buffer;
struct rb_irq_work *work;
 
-   if ((cpu == RING_BUFFER_ALL_CPUS  !ring_buffer_empty(buffer)) ||
-   (cpu != RING_BUFFER_ALL_CPUS  !ring_buffer_empty_cpu(buffer, 
cpu)))
-   return POLLIN | POLLRDNORM;
-
if (cpu == RING_BUFFER_ALL_CPUS)
work = buffer-irq_work;
else {
-- 
1.8.1

--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH] ring-buffer: Fix polling on trace_pipe

2014-06-10 Thread Steven Rostedt
On Mon, 9 Jun 2014 23:06:42 -0700
Martin Lau ka...@fb.com wrote:

 ring_buffer_poll_wait() should always put the poll_table to its wait_queue
 even there is immediate data available.  Otherwise, the following epoll and
 read sequence will eventually hang forever:
 
 1. Put some data to make the trace_pipe ring_buffer read ready first
 2. epoll_ctl(efd, EPOLL_CTL_ADD, trace_pipe_fd, ee)
 3. epoll_wait()
 4. read(trace_pipe_fd) till EAGAIN
 5. Add some more data to the trace_pipe ring_buffer
 6. epoll_wait() - this epoll_wait() will block forever
 
 ~ During the epoll_ctl(efd, EPOLL_CTL_ADD,...) call in step 2,
   ring_buffer_poll_wait() returns immediately without adding poll_table,
   which has poll_table-_qproc pointing to ep_poll_callback(), to its
   wait_queue.
 ~ During the epoll_wait() call in step 3 and step 6,
   ring_buffer_poll_wait() cannot add ep_poll_callback() to its wait_queue
   because the poll_table-_qproc is NULL and it is how epoll works.
 ~ When there is new data available in step 6, ring_buffer does not know
   it has to call ep_poll_callback() because it is not in its wait queue.
   Hence, block forever.
 
 Other poll implementation seems to call poll_wait() unconditionally as the 
 very
 first thing to do.  For example, tcp_poll() in tcp.c.

I'm trying to see the effect of this bug, but can't seem to reproduce
it. Maybe I did something wrong. Attached is a test program I wrote
trying to follow your instructions. I don't use epoll, so perhaps I
didn't use it correctly.

Can you modify it to show me the problem this is trying to fix. That
is, without this patch it hangs, but with the patch it does not.

Thanks!

-- Steve

 
 Signed-off-by: Martin Lau ka...@fb.com
 ---
  kernel/trace/ring_buffer.c | 4 
  1 file changed, 4 deletions(-)
 
 diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
 index fd12cc5..a6e64e8 100644
 --- a/kernel/trace/ring_buffer.c
 +++ b/kernel/trace/ring_buffer.c
 @@ -613,10 +613,6 @@ int ring_buffer_poll_wait(struct ring_buffer *buffer, 
 int cpu,
   struct ring_buffer_per_cpu *cpu_buffer;
   struct rb_irq_work *work;
  
 - if ((cpu == RING_BUFFER_ALL_CPUS  !ring_buffer_empty(buffer)) ||
 - (cpu != RING_BUFFER_ALL_CPUS  !ring_buffer_empty_cpu(buffer, 
 cpu)))
 - return POLLIN | POLLRDNORM;
 -
   if (cpu == RING_BUFFER_ALL_CPUS)
   work = buffer-irq_work;
   else {

#include stdio.h
#include stdlib.h
#include string.h
#include sys/types.h
#include sys/stat.h
#include sys/epoll.h
#include fcntl.h
#include unistd.h

static const char * debugfs_list[] = {
	/debug/tracing,
	/sys/kernel/debug/tracing,
	/d/tracing,
	NULL,
};

static const char *debugfs;
static int markfd;
static int trace_pipe_fd;

static const char *find_debugfs(void)
{
	struct stat st;
	int i;
	int r;

	for (i = 0; debugfs_list[i]; i++) {
		r = stat(debugfs_list[i], st);
		if (r  0)
			continue;
		if (S_ISDIR(st.st_mode))
			return debugfs_list[i];
	}
	return NULL;
}

static char * make_path(const char *file)
{
	char *path;
	int size;

	size = strlen(debugfs) + strlen(file) + 2;
	path = malloc(size);
	if (!path) {
		perror(malloc);
		exit(-1);
	}
	sprintf(path, %s/%s, debugfs, file);
	return path;
}

static void mark_write(const char *str)
{
	write(markfd, str, strlen(str));
}

static void read_trace_pipe(void)
{
	char buf[1024];
	int r;

	while ((r = read(trace_pipe_fd, buf, 1024))  0)
		printf(%.*s, r, buf);

}

int main (int argc, char **argv)
{
	struct epoll_event ee;
	char *marker;
	char *pipe;
	int efd;
	int ret;

	debugfs = find_debugfs();
	if (!debugfs) {
		fprintf(stderr, Could not find debugfs\n);
		exit(-1);
	}

	marker = make_path(trace_marker);
	pipe = make_path(trace_pipe);

	markfd = open(marker, O_WRONLY);
	if (markfd  0) {
		perror(marker);
		exit(-1);
	}
	trace_pipe_fd = open(pipe, O_RDONLY|O_NONBLOCK);
	if (trace_pipe_fd  0) {
		perror(trace_pipe);
		exit(-1);
	}

	efd = epoll_create(1);
	if (efd  0) {
		perror(epoll_create);
		exit(-1);
	}

	mark_write(some data);
	ret = epoll_ctl(efd, EPOLL_CTL_ADD, trace_pipe_fd, ee);
	if (ret  0) {
		perror(epoll_ctl);
		exit(-1);
	}
	epoll_wait(efd, ee, 1, 0);
	read_trace_pipe();
	mark_write(more data);
	epoll_wait(efd, ee, 1, 0);
	read_trace_pipe();

	return 0;
}


Re: [PATCH] ring-buffer: Fix polling on trace_pipe

2014-06-10 Thread Martin Lau
Hi Steve,

Attached is the modified test program.  Here is the sample output:

localhost ~ # ./ftrace-test-epoll-kafai
   ...-1857  [000] ...1   720.174295: tracing_mark_write: some data
1857: waitting for more data..
1858: written more data
block here

Thanks,
--Martin

On Tue, Jun 10, 2014 at 11:49:15AM -0400, Steven Rostedt wrote:
 On Mon, 9 Jun 2014 23:06:42 -0700
 Martin Lau ka...@fb.com wrote:
 
  ring_buffer_poll_wait() should always put the poll_table to its wait_queue
  even there is immediate data available.  Otherwise, the following epoll and
  read sequence will eventually hang forever:
  
  1. Put some data to make the trace_pipe ring_buffer read ready first
  2. epoll_ctl(efd, EPOLL_CTL_ADD, trace_pipe_fd, ee)
  3. epoll_wait()
  4. read(trace_pipe_fd) till EAGAIN
  5. Add some more data to the trace_pipe ring_buffer
  6. epoll_wait() - this epoll_wait() will block forever
  
  ~ During the epoll_ctl(efd, EPOLL_CTL_ADD,...) call in step 2,
ring_buffer_poll_wait() returns immediately without adding poll_table,
which has poll_table-_qproc pointing to ep_poll_callback(), to its
wait_queue.
  ~ During the epoll_wait() call in step 3 and step 6,
ring_buffer_poll_wait() cannot add ep_poll_callback() to its wait_queue
because the poll_table-_qproc is NULL and it is how epoll works.
  ~ When there is new data available in step 6, ring_buffer does not know
it has to call ep_poll_callback() because it is not in its wait queue.
Hence, block forever.
  
  Other poll implementation seems to call poll_wait() unconditionally as the 
  very
  first thing to do.  For example, tcp_poll() in tcp.c.
 
 I'm trying to see the effect of this bug, but can't seem to reproduce
 it. Maybe I did something wrong. Attached is a test program I wrote
 trying to follow your instructions. I don't use epoll, so perhaps I
 didn't use it correctly.
 
 Can you modify it to show me the problem this is trying to fix. That
 is, without this patch it hangs, but with the patch it does not.
 
 Thanks!
 
 -- Steve
 
  
  Signed-off-by: Martin Lau ka...@fb.com
  ---
   kernel/trace/ring_buffer.c | 4 
   1 file changed, 4 deletions(-)
  
  diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
  index fd12cc5..a6e64e8 100644
  --- a/kernel/trace/ring_buffer.c
  +++ b/kernel/trace/ring_buffer.c
  @@ -613,10 +613,6 @@ int ring_buffer_poll_wait(struct ring_buffer *buffer, 
  int cpu,
  struct ring_buffer_per_cpu *cpu_buffer;
  struct rb_irq_work *work;
   
  -   if ((cpu == RING_BUFFER_ALL_CPUS  !ring_buffer_empty(buffer)) ||
  -   (cpu != RING_BUFFER_ALL_CPUS  !ring_buffer_empty_cpu(buffer, 
  cpu)))
  -   return POLLIN | POLLRDNORM;
  -
  if (cpu == RING_BUFFER_ALL_CPUS)
  work = buffer-irq_work;
  else {
 

#include stdio.h
#include stdlib.h
#include string.h
#include sys/types.h
#include sys/stat.h
#include sys/epoll.h
#include fcntl.h
#include unistd.h
#include assert.h

static const char * debugfs_list[] = {
/debug/tracing,
/sys/kernel/debug/tracing,
/d/tracing,
NULL,
};

static const char *debugfs;
static int markfd;
static int trace_pipe_fd;

static const char *find_debugfs(void)
{
struct stat st;
int i;
int r;

for (i = 0; debugfs_list[i]; i++) {
r = stat(debugfs_list[i], st);
if (r  0)
continue;
if (S_ISDIR(st.st_mode))
return debugfs_list[i];
}
return NULL;
}

static char * make_path(const char *file)
{
char *path;
int size;

size = strlen(debugfs) + strlen(file) + 2;
path = malloc(size);
if (!path) {
perror(malloc);
exit(-1);
}
sprintf(path, %s/%s, debugfs, file);
return path;
}

static void mark_write(const char *str)
{
write(markfd, str, strlen(str));
}

static void read_trace_pipe(void)
{
char buf[1024];
int r;

while ((r = read(trace_pipe_fd, buf, 1024))  0)
printf(%.*s, r, buf);
}

int main (int argc, char **argv)
{
struct epoll_event ee;
char *marker;
char *pipe;
int efd;
int ret;
pid_t dwrt_pid;

debugfs = find_debugfs();
if (!debugfs) {
fprintf(stderr, Could not find debugfs\n);
exit(-1);
}

marker = make_path(trace_marker);
pipe = make_path(trace_pipe);

markfd = open(marker, O_WRONLY);
if (markfd  0) {
perror(marker);
exit(-1);
}
trace_pipe_fd = open(pipe, O_RDONLY|O_NONBLOCK);
if (trace_pipe_fd  0) {
perror(trace_pipe);
exit(-1);
}

efd = epoll_create(1);
if (efd  0) {
perror(epoll_create);
exit(-1);
}