Re: [Devel] [PATCH 2/6] nfsd: swap fs root in NFSd kthreads

2013-01-13 Thread Stanislav Kinsbursky

Thanks!

11.01.2013 21:20, J. Bruce Fields пишет:

On Fri, Jan 11, 2013 at 12:03:12PM -0500, J. Bruce Fields wrote:

On Fri, Jan 11, 2013 at 06:56:58PM +0400, Stanislav Kinsbursky wrote:

11.12.2012 19:35, J. Bruce Fields пишет:

On Tue, Dec 11, 2012 at 10:20:36AM -0500, J. Bruce Fields wrote:

On Tue, Dec 11, 2012 at 07:07:00PM +0400, Stanislav Kinsbursky wrote:

I don't really understand, how  mountd's root can be wrong. I.e.
its' always right as I see it. NFSd kthreads have to swap/use
relative path/whatever to communicate with proper mountd.
Or I'm missing something?


Ugh, I see the problem: I thought svc_export_request was called at the
time mountd does the read, but instead its done at the time nfsd does
the upcall.

I suspect that's wrong, and we really want this done in the context of
the mountd process when it does the read call.  If d_path is called
there then we have no problem.


Right, so I'd be happier if we could modify sunrpc_cache_pipe_upcall to
skip calling cache_request and instead delay that until cache_read().  I
think that should be possible.



So, Bruce, what we going to do (or what you want me to do) with the rest of 
NFSd changes?
I.e. how I should solve this d_path() problem?
I.e. I don't understand what did you mean by "I'd be happier if we could modify 
sunrpc_cache_pipe_upcall to
skip calling cache_request and instead delay that until cache_read()".
Could you give me a hint?


Definitely.  So normally the way these upcalls happen are:

1. the kernel does a cache lookup, finds no matching item, and
   calls sunrpc_cache_pipe_upcall().
2. sunrpc_cache_pipe_upcall() formats the upcall: it allocates a
   struct cache_request crq and fills crq->buf with the upcall
   data by calling the cache's ->cache_request() method.
3. Then rpc.mountd realizes there's data available in
   /proc/net/rpc/nfsd.fh/content, so it does a read on that file.
4. cache_read copies the formatted upcall from crq->buf to
   to userspace.

So all I'm suggesting is that instead of calling ->cache_request() at
step 2, we do it at step 4.

Then cache_request will be called from rpc.mountd's read.  So we'll know
which container rpc.mountd is in.

Does that make sense?


The following is untested, ugly, and almost certainly insufficient and
wrong, but maybe it's a starting point:

--b.

diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c
index 9f84703..f15e4c1 100644
--- a/net/sunrpc/cache.c
+++ b/net/sunrpc/cache.c
@@ -744,6 +744,7 @@ struct cache_request {
char* buf;
int len;
int readers;
+   void (*cache_request)(struct cache_detail *, struct cache_head *, char 
**, int *);
  };
  struct cache_reader {
struct cache_queue  q;
@@ -785,10 +786,19 @@ static ssize_t cache_read(struct file *filp, char __user 
*buf, size_t count,
spin_unlock(&queue_lock);

if (rp->offset == 0 && !test_bit(CACHE_PENDING, &rq->item->flags)) {
+   char *bp;
+   int len = PAGE_SIZE;
+
err = -EAGAIN;
spin_lock(&queue_lock);
list_move(&rp->q.list, &rq->q.list);
spin_unlock(&queue_lock);
+
+   bp = rq->buf;
+   rq->cache_request(cd, rq->item, &bp, &len);
+   if (rq->len < 0)
+   goto out;
+   rq->len = PAGE_SIZE - len;
} else {
if (rp->offset + count > rq->len)
count = rq->len - rp->offset;
@@ -1149,8 +1159,6 @@ int sunrpc_cache_pipe_upcall(struct cache_detail *detail, 
struct cache_head *h,

char *buf;
struct cache_request *crq;
-   char *bp;
-   int len;

if (!cache_listeners_exist(detail)) {
warn_no_listener(detail);
@@ -1167,19 +1175,10 @@ int sunrpc_cache_pipe_upcall(struct cache_detail 
*detail, struct cache_head *h,
return -EAGAIN;
}

-   bp = buf; len = PAGE_SIZE;
-
-   cache_request(detail, h, &bp, &len);
-
-   if (len < 0) {
-   kfree(buf);
-   kfree(crq);
-   return -EAGAIN;
-   }
+   crq->cache_request = cache_request;
crq->q.reader = 0;
crq->item = cache_get(h);
crq->buf = buf;
-   crq->len = PAGE_SIZE - len;
crq->readers = 0;
spin_lock(&queue_lock);
list_add_tail(&crq->q.list, &detail->queue);




--
Best regards,
Stanislav Kinsbursky
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [Devel] [PATCH 2/6] nfsd: swap fs root in NFSd kthreads

2013-01-13 Thread Stanislav Kinsbursky

11.01.2013 21:03, J. Bruce Fields пишет:

On Fri, Jan 11, 2013 at 06:56:58PM +0400, Stanislav Kinsbursky wrote:

11.12.2012 19:35, J. Bruce Fields пишет:

On Tue, Dec 11, 2012 at 10:20:36AM -0500, J. Bruce Fields wrote:

On Tue, Dec 11, 2012 at 07:07:00PM +0400, Stanislav Kinsbursky wrote:

I don't really understand, how  mountd's root can be wrong. I.e.
its' always right as I see it. NFSd kthreads have to swap/use
relative path/whatever to communicate with proper mountd.
Or I'm missing something?


Ugh, I see the problem: I thought svc_export_request was called at the
time mountd does the read, but instead its done at the time nfsd does
the upcall.

I suspect that's wrong, and we really want this done in the context of
the mountd process when it does the read call.  If d_path is called
there then we have no problem.


Right, so I'd be happier if we could modify sunrpc_cache_pipe_upcall to
skip calling cache_request and instead delay that until cache_read().  I
think that should be possible.



So, Bruce, what we going to do (or what you want me to do) with the rest of 
NFSd changes?
I.e. how I should solve this d_path() problem?
I.e. I don't understand what did you mean by "I'd be happier if we could modify 
sunrpc_cache_pipe_upcall to
skip calling cache_request and instead delay that until cache_read()".
Could you give me a hint?


Definitely.  So normally the way these upcalls happen are:

1. the kernel does a cache lookup, finds no matching item, and
   calls sunrpc_cache_pipe_upcall().
2. sunrpc_cache_pipe_upcall() formats the upcall: it allocates a
   struct cache_request crq and fills crq->buf with the upcall
   data by calling the cache's ->cache_request() method.
3. Then rpc.mountd realizes there's data available in
   /proc/net/rpc/nfsd.fh/content, so it does a read on that file.
4. cache_read copies the formatted upcall from crq->buf to
   to userspace.

So all I'm suggesting is that instead of calling ->cache_request() at
step 2, we do it at step 4.

Then cache_request will be called from rpc.mountd's read.  So we'll know
which container rpc.mountd is in.

Does that make sense?



Ok, now I understand.
If the upcall is just the way to inform rpc.mound, that there are some data to 
read and process, then it your proposing changes should work.
I'll prepare initial patch set.
Thanks!


--b.




--
Best regards,
Stanislav Kinsbursky
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [Devel] [PATCH 2/6] nfsd: swap fs root in NFSd kthreads

2013-01-11 Thread J. Bruce Fields
On Fri, Jan 11, 2013 at 12:03:12PM -0500, J. Bruce Fields wrote:
> On Fri, Jan 11, 2013 at 06:56:58PM +0400, Stanislav Kinsbursky wrote:
> > 11.12.2012 19:35, J. Bruce Fields пишет:
> > >On Tue, Dec 11, 2012 at 10:20:36AM -0500, J. Bruce Fields wrote:
> > >>On Tue, Dec 11, 2012 at 07:07:00PM +0400, Stanislav Kinsbursky wrote:
> > >>>I don't really understand, how  mountd's root can be wrong. I.e.
> > >>>its' always right as I see it. NFSd kthreads have to swap/use
> > >>>relative path/whatever to communicate with proper mountd.
> > >>>Or I'm missing something?
> > >>
> > >>Ugh, I see the problem: I thought svc_export_request was called at the
> > >>time mountd does the read, but instead its done at the time nfsd does
> > >>the upcall.
> > >>
> > >>I suspect that's wrong, and we really want this done in the context of
> > >>the mountd process when it does the read call.  If d_path is called
> > >>there then we have no problem.
> > >
> > >Right, so I'd be happier if we could modify sunrpc_cache_pipe_upcall to
> > >skip calling cache_request and instead delay that until cache_read().  I
> > >think that should be possible.
> > >
> > 
> > So, Bruce, what we going to do (or what you want me to do) with the rest of 
> > NFSd changes?
> > I.e. how I should solve this d_path() problem?
> > I.e. I don't understand what did you mean by "I'd be happier if we could 
> > modify sunrpc_cache_pipe_upcall to
> > skip calling cache_request and instead delay that until cache_read()".
> > Could you give me a hint?
> 
> Definitely.  So normally the way these upcalls happen are:
> 
>   1. the kernel does a cache lookup, finds no matching item, and
>  calls sunrpc_cache_pipe_upcall().
>   2. sunrpc_cache_pipe_upcall() formats the upcall: it allocates a
>  struct cache_request crq and fills crq->buf with the upcall
>  data by calling the cache's ->cache_request() method.
>   3. Then rpc.mountd realizes there's data available in
>  /proc/net/rpc/nfsd.fh/content, so it does a read on that file.
>   4. cache_read copies the formatted upcall from crq->buf to
>  to userspace.
> 
> So all I'm suggesting is that instead of calling ->cache_request() at
> step 2, we do it at step 4.
> 
> Then cache_request will be called from rpc.mountd's read.  So we'll know
> which container rpc.mountd is in.
> 
> Does that make sense?

The following is untested, ugly, and almost certainly insufficient and
wrong, but maybe it's a starting point:

--b.

diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c
index 9f84703..f15e4c1 100644
--- a/net/sunrpc/cache.c
+++ b/net/sunrpc/cache.c
@@ -744,6 +744,7 @@ struct cache_request {
char* buf;
int len;
int readers;
+   void (*cache_request)(struct cache_detail *, struct cache_head *, char 
**, int *);
 };
 struct cache_reader {
struct cache_queue  q;
@@ -785,10 +786,19 @@ static ssize_t cache_read(struct file *filp, char __user 
*buf, size_t count,
spin_unlock(&queue_lock);
 
if (rp->offset == 0 && !test_bit(CACHE_PENDING, &rq->item->flags)) {
+   char *bp;
+   int len = PAGE_SIZE;
+
err = -EAGAIN;
spin_lock(&queue_lock);
list_move(&rp->q.list, &rq->q.list);
spin_unlock(&queue_lock);
+
+   bp = rq->buf;
+   rq->cache_request(cd, rq->item, &bp, &len);
+   if (rq->len < 0)
+   goto out;
+   rq->len = PAGE_SIZE - len;
} else {
if (rp->offset + count > rq->len)
count = rq->len - rp->offset;
@@ -1149,8 +1159,6 @@ int sunrpc_cache_pipe_upcall(struct cache_detail *detail, 
struct cache_head *h,
 
char *buf;
struct cache_request *crq;
-   char *bp;
-   int len;
 
if (!cache_listeners_exist(detail)) {
warn_no_listener(detail);
@@ -1167,19 +1175,10 @@ int sunrpc_cache_pipe_upcall(struct cache_detail 
*detail, struct cache_head *h,
return -EAGAIN;
}
 
-   bp = buf; len = PAGE_SIZE;
-
-   cache_request(detail, h, &bp, &len);
-
-   if (len < 0) {
-   kfree(buf);
-   kfree(crq);
-   return -EAGAIN;
-   }
+   crq->cache_request = cache_request;
crq->q.reader = 0;
crq->item = cache_get(h);
crq->buf = buf;
-   crq->len = PAGE_SIZE - len;
crq->readers = 0;
spin_lock(&queue_lock);
list_add_tail(&crq->q.list, &detail->queue);
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [Devel] [PATCH 2/6] nfsd: swap fs root in NFSd kthreads

2013-01-11 Thread J. Bruce Fields
On Fri, Jan 11, 2013 at 06:56:58PM +0400, Stanislav Kinsbursky wrote:
> 11.12.2012 19:35, J. Bruce Fields пишет:
> >On Tue, Dec 11, 2012 at 10:20:36AM -0500, J. Bruce Fields wrote:
> >>On Tue, Dec 11, 2012 at 07:07:00PM +0400, Stanislav Kinsbursky wrote:
> >>>I don't really understand, how  mountd's root can be wrong. I.e.
> >>>its' always right as I see it. NFSd kthreads have to swap/use
> >>>relative path/whatever to communicate with proper mountd.
> >>>Or I'm missing something?
> >>
> >>Ugh, I see the problem: I thought svc_export_request was called at the
> >>time mountd does the read, but instead its done at the time nfsd does
> >>the upcall.
> >>
> >>I suspect that's wrong, and we really want this done in the context of
> >>the mountd process when it does the read call.  If d_path is called
> >>there then we have no problem.
> >
> >Right, so I'd be happier if we could modify sunrpc_cache_pipe_upcall to
> >skip calling cache_request and instead delay that until cache_read().  I
> >think that should be possible.
> >
> 
> So, Bruce, what we going to do (or what you want me to do) with the rest of 
> NFSd changes?
> I.e. how I should solve this d_path() problem?
> I.e. I don't understand what did you mean by "I'd be happier if we could 
> modify sunrpc_cache_pipe_upcall to
> skip calling cache_request and instead delay that until cache_read()".
> Could you give me a hint?

Definitely.  So normally the way these upcalls happen are:

1. the kernel does a cache lookup, finds no matching item, and
   calls sunrpc_cache_pipe_upcall().
2. sunrpc_cache_pipe_upcall() formats the upcall: it allocates a
   struct cache_request crq and fills crq->buf with the upcall
   data by calling the cache's ->cache_request() method.
3. Then rpc.mountd realizes there's data available in
   /proc/net/rpc/nfsd.fh/content, so it does a read on that file.
4. cache_read copies the formatted upcall from crq->buf to
   to userspace.

So all I'm suggesting is that instead of calling ->cache_request() at
step 2, we do it at step 4.

Then cache_request will be called from rpc.mountd's read.  So we'll know
which container rpc.mountd is in.

Does that make sense?

--b.
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [Devel] [PATCH 2/6] nfsd: swap fs root in NFSd kthreads

2013-01-11 Thread Stanislav Kinsbursky

11.12.2012 19:35, J. Bruce Fields пишет:

On Tue, Dec 11, 2012 at 10:20:36AM -0500, J. Bruce Fields wrote:

On Tue, Dec 11, 2012 at 07:07:00PM +0400, Stanislav Kinsbursky wrote:

I don't really understand, how  mountd's root can be wrong. I.e.
its' always right as I see it. NFSd kthreads have to swap/use
relative path/whatever to communicate with proper mountd.
Or I'm missing something?


Ugh, I see the problem: I thought svc_export_request was called at the
time mountd does the read, but instead its done at the time nfsd does
the upcall.

I suspect that's wrong, and we really want this done in the context of
the mountd process when it does the read call.  If d_path is called
there then we have no problem.


Right, so I'd be happier if we could modify sunrpc_cache_pipe_upcall to
skip calling cache_request and instead delay that until cache_read().  I
think that should be possible.



So, Bruce, what we going to do (or what you want me to do) with the rest of 
NFSd changes?
I.e. how I should solve this d_path() problem?
I.e. I don't understand what did you mean by "I'd be happier if we could modify 
sunrpc_cache_pipe_upcall to
skip calling cache_request and instead delay that until cache_read()".
Could you give me a hint?


--b.




--
Best regards,
Stanislav Kinsbursky
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [Devel] [PATCH 2/6] nfsd: swap fs root in NFSd kthreads

2012-12-11 Thread Stanislav Kinsbursky

11.12.2012 19:35, J. Bruce Fields пишет:

On Tue, Dec 11, 2012 at 10:20:36AM -0500, J. Bruce Fields wrote:

On Tue, Dec 11, 2012 at 07:07:00PM +0400, Stanislav Kinsbursky wrote:

I don't really understand, how  mountd's root can be wrong. I.e.
its' always right as I see it. NFSd kthreads have to swap/use
relative path/whatever to communicate with proper mountd.
Or I'm missing something?


Ugh, I see the problem: I thought svc_export_request was called at the
time mountd does the read, but instead its done at the time nfsd does
the upcall.

I suspect that's wrong, and we really want this done in the context of
the mountd process when it does the read call.  If d_path is called
there then we have no problem.


Right, so I'd be happier if we could modify sunrpc_cache_pipe_upcall to
skip calling cache_request and instead delay that until cache_read().  I
think that should be possible.



Hmmm...
I'm not familiar with mountd to kernel exchange protocol.
It looks like kernel passes that path string to mountd on upcall.


--b.




--
Best regards,
Stanislav Kinsbursky
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [Devel] [PATCH 2/6] nfsd: swap fs root in NFSd kthreads

2012-12-11 Thread J. Bruce Fields
On Tue, Dec 11, 2012 at 10:20:36AM -0500, J. Bruce Fields wrote:
> On Tue, Dec 11, 2012 at 07:07:00PM +0400, Stanislav Kinsbursky wrote:
> > I don't really understand, how  mountd's root can be wrong. I.e.
> > its' always right as I see it. NFSd kthreads have to swap/use
> > relative path/whatever to communicate with proper mountd.
> > Or I'm missing something?
> 
> Ugh, I see the problem: I thought svc_export_request was called at the
> time mountd does the read, but instead its done at the time nfsd does
> the upcall.
> 
> I suspect that's wrong, and we really want this done in the context of
> the mountd process when it does the read call.  If d_path is called
> there then we have no problem.

Right, so I'd be happier if we could modify sunrpc_cache_pipe_upcall to
skip calling cache_request and instead delay that until cache_read().  I
think that should be possible.

--b.
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [Devel] [PATCH 2/6] nfsd: swap fs root in NFSd kthreads

2012-12-11 Thread J. Bruce Fields
On Tue, Dec 11, 2012 at 07:07:00PM +0400, Stanislav Kinsbursky wrote:
> 11.12.2012 18:56, J. Bruce Fields пишет:
> >On Tue, Dec 11, 2012 at 06:12:40PM +0400, Stanislav Kinsbursky wrote:
> >>UID: 9899
> >>
> >>11.12.2012 18:00, Stanislav Kinsbursky пишет:
> >>>11.12.2012 00:28, J. Bruce Fields пишет:
> On Thu, Dec 06, 2012 at 06:34:47PM +0300, Stanislav Kinsbursky wrote:
> >NFSd does lookup. Lookup is done starting from current->fs->root.
> >NFSd is a kthread, cloned by kthreadd, and thus have global (but luckely
> >unshared) root.
> >So we have to swap root to those, which process, started NFSd, has. 
> >Because
> >that process can be in a container with it's own root.
> 
> This doesn't sound right to me.
> 
> Which lookups exactly do you see being done relative to
> current->fs->root ?
> 
> >>>
> >>>Ok, you are right. I was mistaken here.
> >>>This is not a exactly lookup, but d_path() problem in svc_export_request().
> >>>I.e. without root swapping, d_path() will give not local export path (like 
> >>>"/export")
> >>>but something like this "/root/containers_root/export".
> >>>
> >>
> >>We, actually, can do it less in less aggressive way.
> >>I.e. instead root swap and current svc_export_request() implementation:
> >>
> >>void svc_export_request(...)
> >>{
> >>
> >> pth = d_path(&exp->ex_path, *bpp, *blen);
> >>
> >>}
> >>
> >>we can do something like this:
> >>
> >>void svc_export_request(...)
> >>{
> >>struct nfsd_net *nn = ...
> >>
> >>spin_lock(&dcache_lock);
> >> pth = __d_path(&exp->ex_path, &nn->root, *bpp, *blen);
> >>spin_unlock(&dcache_lock);
> >>
> >>}
> >
> >That looks simpler, but I still don't understand why we need it.
> >
> >I'm confused about how d_path works; I would have thought that
> >filesystem namespaces would have their own vfsmount trees and hence that
> >the (vfsmount, dentry) would be enough to specify the path.  Is the root
> >argument for the case of chroot?  Do we care about that?
> >
> 
> It works very simple: just traverse the tree from specified dentry up to 
> current->fs->root.dentry.
> Having container in some fully separated mount point is great, of course. But:
> 1) this is a limitation we really want to avoid. I.e. container can be 
> chrooted into some path like "/root/containers_root/" as in example above.
> 2) NFSd kthread works in init root environment. But we anyway want to get 
> proper path string in container root, but not in kthreads root.
> 
> >Also, svc_export_request is called from mountd's read of
> >/proc/net/rpc/nfsd.export/channel.  If mountd's root is wrong, then
> >nothing's going to work anyway.
> >
> 
> I don't really understand, how  mountd's root can be wrong. I.e.
> its' always right as I see it. NFSd kthreads have to swap/use
> relative path/whatever to communicate with proper mountd.
> Or I'm missing something?

Ugh, I see the problem: I thought svc_export_request was called at the
time mountd does the read, but instead its done at the time nfsd does
the upcall.

I suspect that's wrong, and we really want this done in the context of
the mountd process when it does the read call.  If d_path is called
there then we have no problem.

--b.
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [Devel] [PATCH 2/6] nfsd: swap fs root in NFSd kthreads

2012-12-11 Thread Stanislav Kinsbursky

11.12.2012 18:56, J. Bruce Fields пишет:

On Tue, Dec 11, 2012 at 06:12:40PM +0400, Stanislav Kinsbursky wrote:

UID: 9899

11.12.2012 18:00, Stanislav Kinsbursky пишет:

11.12.2012 00:28, J. Bruce Fields пишет:

On Thu, Dec 06, 2012 at 06:34:47PM +0300, Stanislav Kinsbursky wrote:

NFSd does lookup. Lookup is done starting from current->fs->root.
NFSd is a kthread, cloned by kthreadd, and thus have global (but luckely
unshared) root.
So we have to swap root to those, which process, started NFSd, has. Because
that process can be in a container with it's own root.


This doesn't sound right to me.

Which lookups exactly do you see being done relative to
current->fs->root ?



Ok, you are right. I was mistaken here.
This is not a exactly lookup, but d_path() problem in svc_export_request().
I.e. without root swapping, d_path() will give not local export path (like 
"/export")
but something like this "/root/containers_root/export".



We, actually, can do it less in less aggressive way.
I.e. instead root swap and current svc_export_request() implementation:

void svc_export_request(...)
{

 pth = d_path(&exp->ex_path, *bpp, *blen);

}

we can do something like this:

void svc_export_request(...)
{
struct nfsd_net *nn = ...

spin_lock(&dcache_lock);
 pth = __d_path(&exp->ex_path, &nn->root, *bpp, *blen);
spin_unlock(&dcache_lock);

}


That looks simpler, but I still don't understand why we need it.

I'm confused about how d_path works; I would have thought that
filesystem namespaces would have their own vfsmount trees and hence that
the (vfsmount, dentry) would be enough to specify the path.  Is the root
argument for the case of chroot?  Do we care about that?



It works very simple: just traverse the tree from specified dentry up to 
current->fs->root.dentry.
Having container in some fully separated mount point is great, of course. But:
1) this is a limitation we really want to avoid. I.e. container can be chrooted into some 
path like "/root/containers_root/" as in example above.
2) NFSd kthread works in init root environment. But we anyway want to get 
proper path string in container root, but not in kthreads root.


Also, svc_export_request is called from mountd's read of
/proc/net/rpc/nfsd.export/channel.  If mountd's root is wrong, then
nothing's going to work anyway.



I don't really understand, how  mountd's root can be wrong. I.e. its' always right as I see it. NFSd kthreads have to swap/use relative path/whatever to 
communicate with proper mountd.

Or I'm missing something?


--b.




--
Best regards,
Stanislav Kinsbursky
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [Devel] [PATCH 2/6] nfsd: swap fs root in NFSd kthreads

2012-12-11 Thread Al Viro
On Tue, Dec 11, 2012 at 09:56:21AM -0500, J. Bruce Fields wrote:

> That looks simpler, but I still don't understand why we need it.
> 
> I'm confused about how d_path works; I would have thought that
> filesystem namespaces would have their own vfsmount trees and hence that
> the (vfsmount, dentry) would be enough to specify the path.  Is the root
> argument for the case of chroot?  Do we care about that?

__d_path() is relative pathname from here to there.  Whether (and what for)
is it wanted in case of nfsd patches is a separate question...

> Also, svc_export_request is called from mountd's read of
> /proc/net/rpc/nfsd.export/channel.  If mountd's root is wrong, then
> nothing's going to work anyway.
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [Devel] [PATCH 2/6] nfsd: swap fs root in NFSd kthreads

2012-12-11 Thread J. Bruce Fields
On Tue, Dec 11, 2012 at 06:12:40PM +0400, Stanislav Kinsbursky wrote:
> UID: 9899
> 
> 11.12.2012 18:00, Stanislav Kinsbursky пишет:
> >11.12.2012 00:28, J. Bruce Fields пишет:
> >>On Thu, Dec 06, 2012 at 06:34:47PM +0300, Stanislav Kinsbursky wrote:
> >>>NFSd does lookup. Lookup is done starting from current->fs->root.
> >>>NFSd is a kthread, cloned by kthreadd, and thus have global (but luckely
> >>>unshared) root.
> >>>So we have to swap root to those, which process, started NFSd, has. Because
> >>>that process can be in a container with it's own root.
> >>
> >>This doesn't sound right to me.
> >>
> >>Which lookups exactly do you see being done relative to
> >>current->fs->root ?
> >>
> >
> >Ok, you are right. I was mistaken here.
> >This is not a exactly lookup, but d_path() problem in svc_export_request().
> >I.e. without root swapping, d_path() will give not local export path (like 
> >"/export")
> >but something like this "/root/containers_root/export".
> >
> 
> We, actually, can do it less in less aggressive way.
> I.e. instead root swap and current svc_export_request() implementation:
> 
> void svc_export_request(...)
> {
>   
> pth = d_path(&exp->ex_path, *bpp, *blen);
>   
> }
> 
> we can do something like this:
> 
> void svc_export_request(...)
> {
>   struct nfsd_net *nn = ...
>   
>   spin_lock(&dcache_lock);
> pth = __d_path(&exp->ex_path, &nn->root, *bpp, *blen);
>   spin_unlock(&dcache_lock);
>   
> }

That looks simpler, but I still don't understand why we need it.

I'm confused about how d_path works; I would have thought that
filesystem namespaces would have their own vfsmount trees and hence that
the (vfsmount, dentry) would be enough to specify the path.  Is the root
argument for the case of chroot?  Do we care about that?

Also, svc_export_request is called from mountd's read of
/proc/net/rpc/nfsd.export/channel.  If mountd's root is wrong, then
nothing's going to work anyway.

--b.
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [Devel] [PATCH 2/6] nfsd: swap fs root in NFSd kthreads

2012-12-11 Thread Stanislav Kinsbursky

11.12.2012 18:12, Stanislav Kinsbursky пишет:

11.12.2012 18:00, Stanislav Kinsbursky пишет:

11.12.2012 00:28, J. Bruce Fields пишет:

On Thu, Dec 06, 2012 at 06:34:47PM +0300, Stanislav Kinsbursky wrote:

NFSd does lookup. Lookup is done starting from current->fs->root.
NFSd is a kthread, cloned by kthreadd, and thus have global (but luckely
unshared) root.
So we have to swap root to those, which process, started NFSd, has. Because
that process can be in a container with it's own root.


This doesn't sound right to me.

Which lookups exactly do you see being done relative to
current->fs->root ?



Ok, you are right. I was mistaken here.
This is not a exactly lookup, but d_path() problem in svc_export_request().
I.e. without root swapping, d_path() will give not local export path (like 
"/export")
but something like this "/root/containers_root/export".



We, actually, can do it less in less aggressive way.
I.e. instead root swap and current svc_export_request() implementation:

void svc_export_request(...)
{
 
 pth = d_path(&exp->ex_path, *bpp, *blen);
 
}

we can do something like this:

void svc_export_request(...)
{
 struct nfsd_net *nn = ...
 
 spin_lock(&dcache_lock);
 pth = __d_path(&exp->ex_path, &nn->root, *bpp, *blen);
 spin_unlock(&dcache_lock);
 
}



No, this won't work. Sorry for noise.


--b.



Signed-off-by: Stanislav Kinsbursky 
---
  fs/nfsd/netns.h  |1 +
  fs/nfsd/nfssvc.c |   33 -
  2 files changed, 33 insertions(+), 1 deletions(-)

diff --git a/fs/nfsd/netns.h b/fs/nfsd/netns.h
index abfc97c..5c423c6 100644
--- a/fs/nfsd/netns.h
+++ b/fs/nfsd/netns.h
@@ -101,6 +101,7 @@ struct nfsd_net {
  struct timeval nfssvc_boot;

  struct svc_serv *nfsd_serv;
+struct pathroot;
  };

  extern int nfsd_net_id;
diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
index cee62ab..177bb60 100644
--- a/fs/nfsd/nfssvc.c
+++ b/fs/nfsd/nfssvc.c
@@ -392,6 +392,7 @@ int nfsd_create_serv(struct net *net)

  set_max_drc();
  do_gettimeofday(&nn->nfssvc_boot);/* record boot time */
+get_fs_root(current->fs, &nn->root);
  return 0;
  }

@@ -426,8 +427,10 @@ void nfsd_destroy(struct net *net)
  if (destroy)
  svc_shutdown_net(nn->nfsd_serv, net);
  svc_destroy(nn->nfsd_serv);
-if (destroy)
+if (destroy) {
+path_put(&nn->root);
  nn->nfsd_serv = NULL;
+}
  }

  int nfsd_set_nrthreads(int n, int *nthreads, struct net *net)
@@ -533,6 +536,25 @@ out:
  return error;
  }

+/*
+ * This function is actually slightly modified set_fs_root()
+ */
+static void nfsd_swap_root(struct net *net)
+{
+struct nfsd_net *nn = net_generic(net, nfsd_net_id);
+struct fs_struct *fs = current->fs;
+struct path old_root;
+
+path_get(&nn->root);
+spin_lock(&fs->lock);
+write_seqcount_begin(&fs->seq);
+old_root = fs->root;
+fs->root = nn->root;
+write_seqcount_end(&fs->seq);
+spin_unlock(&fs->lock);
+if (old_root.dentry)
+path_put(&old_root);
+}

  /*
   * This is the NFS server kernel thread
@@ -559,6 +581,15 @@ nfsd(void *vrqstp)
  current->fs->umask = 0;

  /*
+ * We have to swap NFSd kthread's fs->root.
+ * Why so? Because NFSd can be started in container, which has it's own
+ * root.
+ * And so what? NFSd lookup files, and lookup start from
+ * current->fs->root.
+ */
+nfsd_swap_root(net);
+
+/*
   * thread is spawned with all signals set to SIG_IGN, re-enable
   * the ones that will bring down the thread
   */










--
Best regards,
Stanislav Kinsbursky
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [Devel] [PATCH 2/6] nfsd: swap fs root in NFSd kthreads

2012-12-11 Thread Stanislav Kinsbursky

11.12.2012 18:00, Stanislav Kinsbursky пишет:

11.12.2012 00:28, J. Bruce Fields пишет:

On Thu, Dec 06, 2012 at 06:34:47PM +0300, Stanislav Kinsbursky wrote:

NFSd does lookup. Lookup is done starting from current->fs->root.
NFSd is a kthread, cloned by kthreadd, and thus have global (but luckely
unshared) root.
So we have to swap root to those, which process, started NFSd, has. Because
that process can be in a container with it's own root.


This doesn't sound right to me.

Which lookups exactly do you see being done relative to
current->fs->root ?



Ok, you are right. I was mistaken here.
This is not a exactly lookup, but d_path() problem in svc_export_request().
I.e. without root swapping, d_path() will give not local export path (like 
"/export")
but something like this "/root/containers_root/export".



We, actually, can do it less in less aggressive way.
I.e. instead root swap and current svc_export_request() implementation:

void svc_export_request(...)
{

pth = d_path(&exp->ex_path, *bpp, *blen);

}

we can do something like this:

void svc_export_request(...)
{
struct nfsd_net *nn = ...

spin_lock(&dcache_lock);
pth = __d_path(&exp->ex_path, &nn->root, *bpp, *blen);
spin_unlock(&dcache_lock);

}


--b.



Signed-off-by: Stanislav Kinsbursky 
---
  fs/nfsd/netns.h  |1 +
  fs/nfsd/nfssvc.c |   33 -
  2 files changed, 33 insertions(+), 1 deletions(-)

diff --git a/fs/nfsd/netns.h b/fs/nfsd/netns.h
index abfc97c..5c423c6 100644
--- a/fs/nfsd/netns.h
+++ b/fs/nfsd/netns.h
@@ -101,6 +101,7 @@ struct nfsd_net {
  struct timeval nfssvc_boot;

  struct svc_serv *nfsd_serv;
+struct pathroot;
  };

  extern int nfsd_net_id;
diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
index cee62ab..177bb60 100644
--- a/fs/nfsd/nfssvc.c
+++ b/fs/nfsd/nfssvc.c
@@ -392,6 +392,7 @@ int nfsd_create_serv(struct net *net)

  set_max_drc();
  do_gettimeofday(&nn->nfssvc_boot);/* record boot time */
+get_fs_root(current->fs, &nn->root);
  return 0;
  }

@@ -426,8 +427,10 @@ void nfsd_destroy(struct net *net)
  if (destroy)
  svc_shutdown_net(nn->nfsd_serv, net);
  svc_destroy(nn->nfsd_serv);
-if (destroy)
+if (destroy) {
+path_put(&nn->root);
  nn->nfsd_serv = NULL;
+}
  }

  int nfsd_set_nrthreads(int n, int *nthreads, struct net *net)
@@ -533,6 +536,25 @@ out:
  return error;
  }

+/*
+ * This function is actually slightly modified set_fs_root()
+ */
+static void nfsd_swap_root(struct net *net)
+{
+struct nfsd_net *nn = net_generic(net, nfsd_net_id);
+struct fs_struct *fs = current->fs;
+struct path old_root;
+
+path_get(&nn->root);
+spin_lock(&fs->lock);
+write_seqcount_begin(&fs->seq);
+old_root = fs->root;
+fs->root = nn->root;
+write_seqcount_end(&fs->seq);
+spin_unlock(&fs->lock);
+if (old_root.dentry)
+path_put(&old_root);
+}

  /*
   * This is the NFS server kernel thread
@@ -559,6 +581,15 @@ nfsd(void *vrqstp)
  current->fs->umask = 0;

  /*
+ * We have to swap NFSd kthread's fs->root.
+ * Why so? Because NFSd can be started in container, which has it's own
+ * root.
+ * And so what? NFSd lookup files, and lookup start from
+ * current->fs->root.
+ */
+nfsd_swap_root(net);
+
+/*
   * thread is spawned with all signals set to SIG_IGN, re-enable
   * the ones that will bring down the thread
   */







--
Best regards,
Stanislav Kinsbursky
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/