Re: [PATCH v3 26/41] vhost: virtio 1.0 endian-ness support
Hi Michael, Do you have a tree from where I could pull these patches ? Thanks, C. On 11/24/2014 12:54 PM, Michael S. Tsirkin wrote: > Signed-off-by: Michael S. Tsirkin > --- > drivers/vhost/vhost.c | 93 > +++ > 1 file changed, 56 insertions(+), 37 deletions(-) > > diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c > index c90f437..4d379ed 100644 > --- a/drivers/vhost/vhost.c > +++ b/drivers/vhost/vhost.c > @@ -33,8 +33,8 @@ enum { > VHOST_MEMORY_F_LOG = 0x1, > }; > > -#define vhost_used_event(vq) ((u16 __user *)>avail->ring[vq->num]) > -#define vhost_avail_event(vq) ((u16 __user *)>used->ring[vq->num]) > +#define vhost_used_event(vq) ((__virtio16 __user *)>avail->ring[vq->num]) > +#define vhost_avail_event(vq) ((__virtio16 __user *)>used->ring[vq->num]) > > static void vhost_poll_func(struct file *file, wait_queue_head_t *wqh, > poll_table *pt) > @@ -1001,7 +1001,7 @@ EXPORT_SYMBOL_GPL(vhost_log_write); > static int vhost_update_used_flags(struct vhost_virtqueue *vq) > { > void __user *used; > - if (__put_user(vq->used_flags, >used->flags) < 0) > + if (__put_user(cpu_to_vhost16(vq, vq->used_flags), >used->flags) < > 0) > return -EFAULT; > if (unlikely(vq->log_used)) { > /* Make sure the flag is seen before log. */ > @@ -1019,7 +1019,7 @@ static int vhost_update_used_flags(struct > vhost_virtqueue *vq) > > static int vhost_update_avail_event(struct vhost_virtqueue *vq, u16 > avail_event) > { > - if (__put_user(vq->avail_idx, vhost_avail_event(vq))) > + if (__put_user(cpu_to_vhost16(vq, vq->avail_idx), > vhost_avail_event(vq))) > return -EFAULT; > if (unlikely(vq->log_used)) { > void __user *used; > @@ -1038,6 +1038,7 @@ static int vhost_update_avail_event(struct > vhost_virtqueue *vq, u16 avail_event) > > int vhost_init_used(struct vhost_virtqueue *vq) > { > + __virtio16 last_used_idx; > int r; > if (!vq->private_data) > return 0; > @@ -1046,7 +1047,13 @@ int vhost_init_used(struct vhost_virtqueue *vq) > if (r) > return r; > vq->signalled_used_valid = false; > - return get_user(vq->last_used_idx, >used->idx); > + if (!access_ok(VERIFY_READ, >used->idx, sizeof vq->used->idx)) > + return -EFAULT; > + r = __get_user(last_used_idx, >used->idx); > + if (r) > + return r; > + vq->last_used_idx = vhost16_to_cpu(vq, last_used_idx); > + return 0; > } > EXPORT_SYMBOL_GPL(vhost_init_used); > > @@ -1087,16 +1094,16 @@ static int translate_desc(struct vhost_virtqueue *vq, > u64 addr, u32 len, > /* Each buffer in the virtqueues is actually a chain of descriptors. This > * function returns the next descriptor in the chain, > * or -1U if we're at the end. */ > -static unsigned next_desc(struct vring_desc *desc) > +static unsigned next_desc(struct vhost_virtqueue *vq, struct vring_desc > *desc) > { > unsigned int next; > > /* If this descriptor says it doesn't chain, we're done. */ > - if (!(desc->flags & VRING_DESC_F_NEXT)) > + if (!(desc->flags & cpu_to_vhost16(vq, VRING_DESC_F_NEXT))) > return -1U; > > /* Check they're not leading us off end of descriptors. */ > - next = desc->next; > + next = vhost16_to_cpu(vq, desc->next); > /* Make sure compiler knows to grab that: we don't want it changing! */ > /* We will use the result as an index in an array, so most >* architectures only need a compiler barrier here. */ > @@ -1113,18 +1120,19 @@ static int get_indirect(struct vhost_virtqueue *vq, > { > struct vring_desc desc; > unsigned int i = 0, count, found = 0; > + u32 len = vhost32_to_cpu(vq, indirect->len); > int ret; > > /* Sanity check */ > - if (unlikely(indirect->len % sizeof desc)) { > + if (unlikely(len % sizeof desc)) { > vq_err(vq, "Invalid length in indirect descriptor: " > "len 0x%llx not multiple of 0x%zx\n", > -(unsigned long long)indirect->len, > +(unsigned long long)vhost32_to_cpu(vq, indirect->len), > sizeof desc); > return -EINVAL; > } > > - ret = translate_desc(vq, indirect->addr, indirect->len, vq->indirect, > + ret = translate_desc(vq, vhost64_to_cpu(vq, indirect->addr), len, > vq->indirect, >UIO_MAXIOV); > if (unlikely(ret < 0)) { > vq_err(vq, "Translation failure %d in indirect.\n", ret); > @@ -1135,7 +1143,7 @@ static int get_indirect(struct vhost_virtqueue *vq, >* architectures only need a compiler barrier here. */ > read_barrier_depends(); > > - count = indirect->len / sizeof desc; > + count = len / sizeof desc; > /* Buffers are chained via a 16 bit next field, so >* we
Re: [PATCH v3 26/41] vhost: virtio 1.0 endian-ness support
Hi Michael, Do you have a tree from where I could pull these patches ? Thanks, C. On 11/24/2014 12:54 PM, Michael S. Tsirkin wrote: Signed-off-by: Michael S. Tsirkin m...@redhat.com --- drivers/vhost/vhost.c | 93 +++ 1 file changed, 56 insertions(+), 37 deletions(-) diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c index c90f437..4d379ed 100644 --- a/drivers/vhost/vhost.c +++ b/drivers/vhost/vhost.c @@ -33,8 +33,8 @@ enum { VHOST_MEMORY_F_LOG = 0x1, }; -#define vhost_used_event(vq) ((u16 __user *)vq-avail-ring[vq-num]) -#define vhost_avail_event(vq) ((u16 __user *)vq-used-ring[vq-num]) +#define vhost_used_event(vq) ((__virtio16 __user *)vq-avail-ring[vq-num]) +#define vhost_avail_event(vq) ((__virtio16 __user *)vq-used-ring[vq-num]) static void vhost_poll_func(struct file *file, wait_queue_head_t *wqh, poll_table *pt) @@ -1001,7 +1001,7 @@ EXPORT_SYMBOL_GPL(vhost_log_write); static int vhost_update_used_flags(struct vhost_virtqueue *vq) { void __user *used; - if (__put_user(vq-used_flags, vq-used-flags) 0) + if (__put_user(cpu_to_vhost16(vq, vq-used_flags), vq-used-flags) 0) return -EFAULT; if (unlikely(vq-log_used)) { /* Make sure the flag is seen before log. */ @@ -1019,7 +1019,7 @@ static int vhost_update_used_flags(struct vhost_virtqueue *vq) static int vhost_update_avail_event(struct vhost_virtqueue *vq, u16 avail_event) { - if (__put_user(vq-avail_idx, vhost_avail_event(vq))) + if (__put_user(cpu_to_vhost16(vq, vq-avail_idx), vhost_avail_event(vq))) return -EFAULT; if (unlikely(vq-log_used)) { void __user *used; @@ -1038,6 +1038,7 @@ static int vhost_update_avail_event(struct vhost_virtqueue *vq, u16 avail_event) int vhost_init_used(struct vhost_virtqueue *vq) { + __virtio16 last_used_idx; int r; if (!vq-private_data) return 0; @@ -1046,7 +1047,13 @@ int vhost_init_used(struct vhost_virtqueue *vq) if (r) return r; vq-signalled_used_valid = false; - return get_user(vq-last_used_idx, vq-used-idx); + if (!access_ok(VERIFY_READ, vq-used-idx, sizeof vq-used-idx)) + return -EFAULT; + r = __get_user(last_used_idx, vq-used-idx); + if (r) + return r; + vq-last_used_idx = vhost16_to_cpu(vq, last_used_idx); + return 0; } EXPORT_SYMBOL_GPL(vhost_init_used); @@ -1087,16 +1094,16 @@ static int translate_desc(struct vhost_virtqueue *vq, u64 addr, u32 len, /* Each buffer in the virtqueues is actually a chain of descriptors. This * function returns the next descriptor in the chain, * or -1U if we're at the end. */ -static unsigned next_desc(struct vring_desc *desc) +static unsigned next_desc(struct vhost_virtqueue *vq, struct vring_desc *desc) { unsigned int next; /* If this descriptor says it doesn't chain, we're done. */ - if (!(desc-flags VRING_DESC_F_NEXT)) + if (!(desc-flags cpu_to_vhost16(vq, VRING_DESC_F_NEXT))) return -1U; /* Check they're not leading us off end of descriptors. */ - next = desc-next; + next = vhost16_to_cpu(vq, desc-next); /* Make sure compiler knows to grab that: we don't want it changing! */ /* We will use the result as an index in an array, so most * architectures only need a compiler barrier here. */ @@ -1113,18 +1120,19 @@ static int get_indirect(struct vhost_virtqueue *vq, { struct vring_desc desc; unsigned int i = 0, count, found = 0; + u32 len = vhost32_to_cpu(vq, indirect-len); int ret; /* Sanity check */ - if (unlikely(indirect-len % sizeof desc)) { + if (unlikely(len % sizeof desc)) { vq_err(vq, Invalid length in indirect descriptor: len 0x%llx not multiple of 0x%zx\n, -(unsigned long long)indirect-len, +(unsigned long long)vhost32_to_cpu(vq, indirect-len), sizeof desc); return -EINVAL; } - ret = translate_desc(vq, indirect-addr, indirect-len, vq-indirect, + ret = translate_desc(vq, vhost64_to_cpu(vq, indirect-addr), len, vq-indirect, UIO_MAXIOV); if (unlikely(ret 0)) { vq_err(vq, Translation failure %d in indirect.\n, ret); @@ -1135,7 +1143,7 @@ static int get_indirect(struct vhost_virtqueue *vq, * architectures only need a compiler barrier here. */ read_barrier_depends(); - count = indirect-len / sizeof desc; + count = len / sizeof desc; /* Buffers are chained via a 16 bit next field, so * we can have at most 2^16 of these. */ if (unlikely(count USHRT_MAX + 1)) { @@ -1155,16 +1163,17 @@ static int get_indirect(struct
Re: [PATCH] offb: Fix little-endian support
On 05/14/2014 04:24 PM, Takashi Iwai wrote: > At Wed, 14 May 2014 16:01:17 +0200, > Cedric Le Goater wrote: >> >> Hi Iwai-san, >> >> On 05/14/2014 03:21 PM, Takashi Iwai wrote: >>> Although the color palette was corrected for little endian by the >>> commit [e1edf18b: offb: Add palette hack for little endian], the >>> graphics mode is still shown in psychedelic colors. >> >> Are you referring to the linux logo colors ? If so, could you please >> try the patch below, it should be a fix. > > Not only penguin logo but the whole X graphics got strange colors, > too, according to the bug report. I put the original reporter/tester > (Dinar Valeev) to Cc. > I'm merely a person who tries to fix this mess ;) > > BTW, did you try to run X on fbdev? Not at the time, I was working on the console only, BE and LE. I just tried fbdev and indeed this is a psychedelic mess :) Your fix has also issues on BE and console and the patch of mine below is of no use for fbdev. Damn, this is a nightmare. C. >>> For fixing this >>> properly, we rather need to correct the RGB offsets depending on >>> endianess. >>> >>> Since the RGB base offsets are corrected, we don't need the hack for >>> pallette color entries. This patch reverts that, too. >> >> Are you testing using qemu -vga std -vnc :x ? If so, did you try changing >> the depth to 8,15,16,32 ? > > Yes, it was with qemu -vga std -vnc :x. > About different color depths, Dinar can test / clarify better, I > suppose. > >> I think the patch might be breaking big endian >> too. > > Big endian should work as is because my patch uses the original > offsets when fb_be_math() is true. It corrects the RGB offsets if > !fb_be_math(). > > But, I'm also entirely not sure whether this is 100% correct, either. > Namely, if the RGB offsets were correct for some little endian > machines with offb, my patch would break it, of course. But, then > your previous fix must have already broken it as well, so I took the > same fb_be_math() check. > > > thanks, > > Takashi > >> Now, I am far from being an expert on frame buffers. It would be glad >> to have some insights on that topic. >> >> Thanks, >> >> C. >> >> >> [PATCH] fb: fix logo palette entries for little endian >> >> The offb_cmap_byteswap() routine helps byteswapping the color map >> entries when required. This patch externalizes and renames the helper >> routine to adjust the pseudo palette of the logo when running on >> little endian. >> >> Signed-off-by: Cédric Le Goater >> --- >> drivers/video/fbmem.c |6 -- >> drivers/video/offb.c | 11 +-- >> include/linux/fb.h|8 >> 3 files changed, 13 insertions(+), 12 deletions(-) >> >> Index: linux.git/drivers/video/fbmem.c >> === >> --- linux.git.orig/drivers/video/fbmem.c >> +++ linux.git/drivers/video/fbmem.c >> @@ -252,7 +252,8 @@ static void fb_set_logo_truepalette(str >> blueshift = info->var.blue.offset - (8 - info->var.blue.length); >> >> for ( i = 0; i < logo->clutsize; i++) { >> -palette[i+32] = (safe_shift((clut[0] & redmask), redshift) | >> +palette[i+32] = fb_cmap_byteswap(info, >> + safe_shift((clut[0] & redmask), redshift) | >> safe_shift((clut[1] & greenmask), greenshift) | >> safe_shift((clut[2] & bluemask), blueshift)); >> clut += 3; >> @@ -271,7 +272,8 @@ static void fb_set_logo_directpalette(st >> blueshift = info->var.blue.offset; >> >> for (i = 32; i < 32 + logo->clutsize; i++) >> -palette[i] = i << redshift | i << greenshift | i << blueshift; >> +palette[i] = fb_cmap_byteswap(info, i << redshift | >> +i << greenshift | i << blueshift); >> } >> >> static void fb_set_logo(struct fb_info *info, >> Index: linux.git/drivers/video/offb.c >> === >> --- linux.git.orig/drivers/video/offb.c >> +++ linux.git/drivers/video/offb.c >> @@ -91,15 +91,6 @@ extern boot_infos_t *boot_infos; >> #define AVIVO_DC_LUTB_WHITE_OFFSET_GREEN0x6cd4 >> #define AVIVO_DC_LUTB_WHITE_OFFSET_RED 0x6cd8 >> >> -#define FB_RIGHT_POS(p, bpp)
Re: [PATCH] offb: Fix little-endian support
Hi Iwai-san, On 05/14/2014 03:21 PM, Takashi Iwai wrote: > Although the color palette was corrected for little endian by the > commit [e1edf18b: offb: Add palette hack for little endian], the > graphics mode is still shown in psychedelic colors. Are you referring to the linux logo colors ? If so, could you please try the patch below, it should be a fix. > For fixing this > properly, we rather need to correct the RGB offsets depending on > endianess. > > Since the RGB base offsets are corrected, we don't need the hack for > pallette color entries. This patch reverts that, too. Are you testing using qemu -vga std -vnc :x ? If so, did you try changing the depth to 8,15,16,32 ? I think the patch might be breaking big endian too. Now, I am far from being an expert on frame buffers. It would be glad to have some insights on that topic. Thanks, C. [PATCH] fb: fix logo palette entries for little endian The offb_cmap_byteswap() routine helps byteswapping the color map entries when required. This patch externalizes and renames the helper routine to adjust the pseudo palette of the logo when running on little endian. Signed-off-by: Cédric Le Goater --- drivers/video/fbmem.c |6 -- drivers/video/offb.c | 11 +-- include/linux/fb.h|8 3 files changed, 13 insertions(+), 12 deletions(-) Index: linux.git/drivers/video/fbmem.c === --- linux.git.orig/drivers/video/fbmem.c +++ linux.git/drivers/video/fbmem.c @@ -252,7 +252,8 @@ static void fb_set_logo_truepalette(str blueshift = info->var.blue.offset - (8 - info->var.blue.length); for ( i = 0; i < logo->clutsize; i++) { - palette[i+32] = (safe_shift((clut[0] & redmask), redshift) | + palette[i+32] = fb_cmap_byteswap(info, +safe_shift((clut[0] & redmask), redshift) | safe_shift((clut[1] & greenmask), greenshift) | safe_shift((clut[2] & bluemask), blueshift)); clut += 3; @@ -271,7 +272,8 @@ static void fb_set_logo_directpalette(st blueshift = info->var.blue.offset; for (i = 32; i < 32 + logo->clutsize; i++) - palette[i] = i << redshift | i << greenshift | i << blueshift; + palette[i] = fb_cmap_byteswap(info, i << redshift | + i << greenshift | i << blueshift); } static void fb_set_logo(struct fb_info *info, Index: linux.git/drivers/video/offb.c === --- linux.git.orig/drivers/video/offb.c +++ linux.git/drivers/video/offb.c @@ -91,15 +91,6 @@ extern boot_infos_t *boot_infos; #define AVIVO_DC_LUTB_WHITE_OFFSET_GREEN0x6cd4 #define AVIVO_DC_LUTB_WHITE_OFFSET_RED 0x6cd8 -#define FB_RIGHT_POS(p, bpp) (fb_be_math(p) ? 0 : (32 - (bpp))) - -static inline u32 offb_cmap_byteswap(struct fb_info *info, u32 value) -{ - u32 bpp = info->var.bits_per_pixel; - - return cpu_to_be32(value) >> FB_RIGHT_POS(info, bpp); -} - /* * Set a single color register. The values supplied are already * rounded down to the hardware's capabilities (according to the @@ -129,7 +120,7 @@ static int offb_setcolreg(u_int regno, u mask <<= info->var.transp.offset; value |= mask; } - pal[regno] = offb_cmap_byteswap(info, value); + pal[regno] = fb_cmap_byteswap(info, value); return 0; } Index: linux.git/include/linux/fb.h === --- linux.git.orig/include/linux/fb.h +++ linux.git/include/linux/fb.h @@ -582,6 +582,7 @@ static inline struct apertures_struct *a #endif +#define FB_RIGHT_POS(p, bpp) (fb_be_math(p) ? 0 : (32 - (bpp))) #define FB_LEFT_POS(p, bpp) (fb_be_math(p) ? (32 - (bpp)) : 0) #define FB_SHIFT_HIGH(p, val, bits) (fb_be_math(p) ? (val) >> (bits) : \ (val) << (bits)) @@ -681,6 +682,13 @@ static inline bool fb_be_math(struct fb_ #endif /* CONFIG_FB_FOREIGN_ENDIAN */ } +static inline u32 fb_cmap_byteswap(struct fb_info *info, u32 value) +{ + u32 bpp = info->var.bits_per_pixel; + + return cpu_to_be32(value) >> FB_RIGHT_POS(info, bpp); +} + /* drivers/video/fbsysfs.c */ extern struct fb_info *framebuffer_alloc(size_t size, struct device *dev); extern void framebuffer_release(struct fb_info *info); -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH] offb: Fix little-endian support
Hi Iwai-san, On 05/14/2014 03:21 PM, Takashi Iwai wrote: Although the color palette was corrected for little endian by the commit [e1edf18b: offb: Add palette hack for little endian], the graphics mode is still shown in psychedelic colors. Are you referring to the linux logo colors ? If so, could you please try the patch below, it should be a fix. For fixing this properly, we rather need to correct the RGB offsets depending on endianess. Since the RGB base offsets are corrected, we don't need the hack for pallette color entries. This patch reverts that, too. Are you testing using qemu -vga std -vnc :x ? If so, did you try changing the depth to 8,15,16,32 ? I think the patch might be breaking big endian too. Now, I am far from being an expert on frame buffers. It would be glad to have some insights on that topic. Thanks, C. [PATCH] fb: fix logo palette entries for little endian The offb_cmap_byteswap() routine helps byteswapping the color map entries when required. This patch externalizes and renames the helper routine to adjust the pseudo palette of the logo when running on little endian. Signed-off-by: Cédric Le Goater c...@fr.ibm.com --- drivers/video/fbmem.c |6 -- drivers/video/offb.c | 11 +-- include/linux/fb.h|8 3 files changed, 13 insertions(+), 12 deletions(-) Index: linux.git/drivers/video/fbmem.c === --- linux.git.orig/drivers/video/fbmem.c +++ linux.git/drivers/video/fbmem.c @@ -252,7 +252,8 @@ static void fb_set_logo_truepalette(str blueshift = info-var.blue.offset - (8 - info-var.blue.length); for ( i = 0; i logo-clutsize; i++) { - palette[i+32] = (safe_shift((clut[0] redmask), redshift) | + palette[i+32] = fb_cmap_byteswap(info, +safe_shift((clut[0] redmask), redshift) | safe_shift((clut[1] greenmask), greenshift) | safe_shift((clut[2] bluemask), blueshift)); clut += 3; @@ -271,7 +272,8 @@ static void fb_set_logo_directpalette(st blueshift = info-var.blue.offset; for (i = 32; i 32 + logo-clutsize; i++) - palette[i] = i redshift | i greenshift | i blueshift; + palette[i] = fb_cmap_byteswap(info, i redshift | + i greenshift | i blueshift); } static void fb_set_logo(struct fb_info *info, Index: linux.git/drivers/video/offb.c === --- linux.git.orig/drivers/video/offb.c +++ linux.git/drivers/video/offb.c @@ -91,15 +91,6 @@ extern boot_infos_t *boot_infos; #define AVIVO_DC_LUTB_WHITE_OFFSET_GREEN0x6cd4 #define AVIVO_DC_LUTB_WHITE_OFFSET_RED 0x6cd8 -#define FB_RIGHT_POS(p, bpp) (fb_be_math(p) ? 0 : (32 - (bpp))) - -static inline u32 offb_cmap_byteswap(struct fb_info *info, u32 value) -{ - u32 bpp = info-var.bits_per_pixel; - - return cpu_to_be32(value) FB_RIGHT_POS(info, bpp); -} - /* * Set a single color register. The values supplied are already * rounded down to the hardware's capabilities (according to the @@ -129,7 +120,7 @@ static int offb_setcolreg(u_int regno, u mask = info-var.transp.offset; value |= mask; } - pal[regno] = offb_cmap_byteswap(info, value); + pal[regno] = fb_cmap_byteswap(info, value); return 0; } Index: linux.git/include/linux/fb.h === --- linux.git.orig/include/linux/fb.h +++ linux.git/include/linux/fb.h @@ -582,6 +582,7 @@ static inline struct apertures_struct *a #endif +#define FB_RIGHT_POS(p, bpp) (fb_be_math(p) ? 0 : (32 - (bpp))) #define FB_LEFT_POS(p, bpp) (fb_be_math(p) ? (32 - (bpp)) : 0) #define FB_SHIFT_HIGH(p, val, bits) (fb_be_math(p) ? (val) (bits) : \ (val) (bits)) @@ -681,6 +682,13 @@ static inline bool fb_be_math(struct fb_ #endif /* CONFIG_FB_FOREIGN_ENDIAN */ } +static inline u32 fb_cmap_byteswap(struct fb_info *info, u32 value) +{ + u32 bpp = info-var.bits_per_pixel; + + return cpu_to_be32(value) FB_RIGHT_POS(info, bpp); +} + /* drivers/video/fbsysfs.c */ extern struct fb_info *framebuffer_alloc(size_t size, struct device *dev); extern void framebuffer_release(struct fb_info *info); -- To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH] offb: Fix little-endian support
On 05/14/2014 04:24 PM, Takashi Iwai wrote: At Wed, 14 May 2014 16:01:17 +0200, Cedric Le Goater wrote: Hi Iwai-san, On 05/14/2014 03:21 PM, Takashi Iwai wrote: Although the color palette was corrected for little endian by the commit [e1edf18b: offb: Add palette hack for little endian], the graphics mode is still shown in psychedelic colors. Are you referring to the linux logo colors ? If so, could you please try the patch below, it should be a fix. Not only penguin logo but the whole X graphics got strange colors, too, according to the bug report. I put the original reporter/tester (Dinar Valeev) to Cc. I'm merely a person who tries to fix this mess ;) BTW, did you try to run X on fbdev? Not at the time, I was working on the console only, BE and LE. I just tried fbdev and indeed this is a psychedelic mess :) Your fix has also issues on BE and console and the patch of mine below is of no use for fbdev. Damn, this is a nightmare. C. For fixing this properly, we rather need to correct the RGB offsets depending on endianess. Since the RGB base offsets are corrected, we don't need the hack for pallette color entries. This patch reverts that, too. Are you testing using qemu -vga std -vnc :x ? If so, did you try changing the depth to 8,15,16,32 ? Yes, it was with qemu -vga std -vnc :x. About different color depths, Dinar can test / clarify better, I suppose. I think the patch might be breaking big endian too. Big endian should work as is because my patch uses the original offsets when fb_be_math() is true. It corrects the RGB offsets if !fb_be_math(). But, I'm also entirely not sure whether this is 100% correct, either. Namely, if the RGB offsets were correct for some little endian machines with offb, my patch would break it, of course. But, then your previous fix must have already broken it as well, so I took the same fb_be_math() check. thanks, Takashi Now, I am far from being an expert on frame buffers. It would be glad to have some insights on that topic. Thanks, C. [PATCH] fb: fix logo palette entries for little endian The offb_cmap_byteswap() routine helps byteswapping the color map entries when required. This patch externalizes and renames the helper routine to adjust the pseudo palette of the logo when running on little endian. Signed-off-by: Cédric Le Goater c...@fr.ibm.com --- drivers/video/fbmem.c |6 -- drivers/video/offb.c | 11 +-- include/linux/fb.h|8 3 files changed, 13 insertions(+), 12 deletions(-) Index: linux.git/drivers/video/fbmem.c === --- linux.git.orig/drivers/video/fbmem.c +++ linux.git/drivers/video/fbmem.c @@ -252,7 +252,8 @@ static void fb_set_logo_truepalette(str blueshift = info-var.blue.offset - (8 - info-var.blue.length); for ( i = 0; i logo-clutsize; i++) { -palette[i+32] = (safe_shift((clut[0] redmask), redshift) | +palette[i+32] = fb_cmap_byteswap(info, + safe_shift((clut[0] redmask), redshift) | safe_shift((clut[1] greenmask), greenshift) | safe_shift((clut[2] bluemask), blueshift)); clut += 3; @@ -271,7 +272,8 @@ static void fb_set_logo_directpalette(st blueshift = info-var.blue.offset; for (i = 32; i 32 + logo-clutsize; i++) -palette[i] = i redshift | i greenshift | i blueshift; +palette[i] = fb_cmap_byteswap(info, i redshift | +i greenshift | i blueshift); } static void fb_set_logo(struct fb_info *info, Index: linux.git/drivers/video/offb.c === --- linux.git.orig/drivers/video/offb.c +++ linux.git/drivers/video/offb.c @@ -91,15 +91,6 @@ extern boot_infos_t *boot_infos; #define AVIVO_DC_LUTB_WHITE_OFFSET_GREEN0x6cd4 #define AVIVO_DC_LUTB_WHITE_OFFSET_RED 0x6cd8 -#define FB_RIGHT_POS(p, bpp) (fb_be_math(p) ? 0 : (32 - (bpp))) - -static inline u32 offb_cmap_byteswap(struct fb_info *info, u32 value) -{ -u32 bpp = info-var.bits_per_pixel; - -return cpu_to_be32(value) FB_RIGHT_POS(info, bpp); -} - /* * Set a single color register. The values supplied are already * rounded down to the hardware's capabilities (according to the @@ -129,7 +120,7 @@ static int offb_setcolreg(u_int regno, u mask = info-var.transp.offset; value |= mask; } -pal[regno] = offb_cmap_byteswap(info, value); +pal[regno] = fb_cmap_byteswap(info, value); return 0; } Index: linux.git/include/linux/fb.h === --- linux.git.orig/include/linux/fb.h
Re: [PATCH 2/2] extend clone_flags using parent_tidptr argument
Andrew Morton wrote: On Mon, 4 Feb 2008 14:24:16 -0600 "Serge E. Hallyn" <[EMAIL PROTECTED]> wrote: Quoting Cedric Le Goater ([EMAIL PROTECTED]): From: Cedric Le Goater <[EMAIL PROTECTED]> We have at least 2 patchsets requiring each a new clone flag and there it is, we've reached the limit, none are left. This patch uses the CLONE_DETACHED flag (unused) as a marker to extend the Are we pretty sure that there is no legacy software out there which has continued to specify CLONE_DETACHED since the kernel ignores it? Please see -mm's clone-prepare-to-recycle-clone_detached-and-clone_stopped.patch That patch has been cut back to only recycle CLONE_STOPPED because there indeed was software out there which is setting CLONE_DETACHED. See http://linux.derkeiler.com/Mailing-Lists/Kernel/2007-11/msg04293.html bummer, I used the wrong one :/ Thanks, C. -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH 2/2] extend clone_flags using parent_tidptr argument
Andrew Morton wrote: On Mon, 4 Feb 2008 14:24:16 -0600 Serge E. Hallyn [EMAIL PROTECTED] wrote: Quoting Cedric Le Goater ([EMAIL PROTECTED]): From: Cedric Le Goater [EMAIL PROTECTED] We have at least 2 patchsets requiring each a new clone flag and there it is, we've reached the limit, none are left. This patch uses the CLONE_DETACHED flag (unused) as a marker to extend the Are we pretty sure that there is no legacy software out there which has continued to specify CLONE_DETACHED since the kernel ignores it? Please see -mm's clone-prepare-to-recycle-clone_detached-and-clone_stopped.patch That patch has been cut back to only recycle CLONE_STOPPED because there indeed was software out there which is setting CLONE_DETACHED. See http://linux.derkeiler.com/Mailing-Lists/Kernel/2007-11/msg04293.html bummer, I used the wrong one :/ Thanks, C. -- To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH 2/2] extend clone_flags using parent_tidptr argument
From: Cedric Le Goater <[EMAIL PROTECTED]> We have at least 2 patchsets requiring each a new clone flag and there it is, we've reached the limit, none are left. This patch uses the CLONE_DETACHED flag (unused) as a marker to extend the clone flags through the parent_tidptr argument. Initially, we thought on using the last bit but it has recently been taken by CLONE_IO. Obviously, this hack doesn't work for unshare() for which I don't see any other solution than to add a new syscall : long sys_unshare64(unsigned long clone_flags_high, unsigned long clone_flags_low); Is this the right path to extend the clone flags ? should we add a clone64() rather than hack the extending clone() ? Thanks for any comments ! C. Signed-off-by: Cedric Le Goater <[EMAIL PROTECTED]> --- include/linux/sched.h |1 + kernel/fork.c | 14 +- 2 files changed, 14 insertions(+), 1 deletion(-) Index: 2.6.24-mm1/include/linux/sched.h === --- 2.6.24-mm1.orig/include/linux/sched.h +++ 2.6.24-mm1/include/linux/sched.h @@ -28,6 +28,7 @@ #define CLONE_NEWPID0x2000 /* New pid namespace */ #define CLONE_NEWNET0x4000 /* New network namespace */ #define CLONE_IO0x8000 /* Clone io context */ +#define CLONE_EXTFLAGS CLONE_DETACHED /* use parent_tidptr as an extended set of flags */ /* * Scheduling policies Index: 2.6.24-mm1/kernel/fork.c === --- 2.6.24-mm1.orig/kernel/fork.c +++ 2.6.24-mm1/kernel/fork.c @@ -1012,6 +1012,14 @@ static struct task_struct *copy_process( struct task_struct *p; int cgroup_callbacks_done = 0; + /* +* It is not permitted to specify both CLONE_EXTFLAGS and +* CLONE_PARENT_SETTID +*/ + if ((clone_flags & (CLONE_EXTFLAGS|CLONE_PARENT_SETTID)) == + (CLONE_EXTFLAGS|CLONE_PARENT_SETTID)) + return ERR_PTR(-EINVAL); + if ((clone_flags & (CLONE_NEWNS|CLONE_FS)) == (CLONE_NEWNS|CLONE_FS)) return ERR_PTR(-EINVAL); @@ -1455,6 +1463,7 @@ long do_fork(unsigned long clone_flags, struct task_struct *p; int trace = 0; long nr; + u64 clone_flags64 = clone_flags; /* * We hope to recycle these flags after 2.6.26 @@ -1479,7 +1488,10 @@ long do_fork(unsigned long clone_flags, clone_flags |= CLONE_PTRACE; } - p = copy_process(clone_flags, stack_start, regs, stack_size, + if (clone_flags & CLONE_EXTFLAGS) + clone_flags64 = ((u64) (uintptr_t) parent_tidptr << 32) | clone_flags; + + p = copy_process(clone_flags64, stack_start, regs, stack_size, child_tidptr, NULL); /* * Do this prior waking up the new thread - the thread pointer -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH 1/2] change clone_flags type to u64
From: Cedric Le Goater <[EMAIL PROTECTED]> This is a preliminary patch changing the clone_flags type to 64bits for all the routines called by do_fork(). It prepares ground for the next patch which introduces an enhanced version of clone(). This is work in progress. All conversions might not be done yet. Signed-off-by: Cedric Le Goater <[EMAIL PROTECTED]> --- arch/alpha/kernel/process.c |2 +- arch/arm/kernel/process.c |2 +- arch/avr32/kernel/process.c |2 +- arch/blackfin/kernel/process.c |2 +- arch/cris/arch-v10/kernel/process.c |2 +- arch/cris/arch-v32/kernel/process.c |2 +- arch/frv/kernel/process.c |2 +- arch/h8300/kernel/process.c |2 +- arch/ia64/ia32/sys_ia32.c |2 +- arch/ia64/kernel/process.c |2 +- arch/m32r/kernel/process.c |2 +- arch/m68k/kernel/process.c |2 +- arch/m68knommu/kernel/process.c |2 +- arch/mips/kernel/process.c |2 +- arch/mn10300/kernel/process.c |2 +- arch/parisc/kernel/process.c|2 +- arch/powerpc/kernel/process.c |2 +- arch/s390/kernel/process.c |2 +- arch/sh/kernel/process_32.c |2 +- arch/sh/kernel/process_64.c |2 +- arch/sparc/kernel/process.c |2 +- arch/sparc64/kernel/process.c |2 +- arch/um/kernel/process.c|2 +- arch/v850/kernel/process.c |2 +- arch/x86/kernel/process_32.c|2 +- arch/x86/kernel/process_64.c|2 +- arch/xtensa/kernel/process.c|2 +- fs/namespace.c |2 +- include/linux/ipc_namespace.h |4 ++-- include/linux/key.h |2 +- include/linux/mnt_namespace.h |2 +- include/linux/nsproxy.h |2 +- include/linux/pid_namespace.h |4 ++-- include/linux/sched.h |2 +- include/linux/security.h|6 +++--- include/linux/sem.h |4 ++-- include/linux/user_namespace.h |4 ++-- include/linux/utsname.h |4 ++-- include/net/net_namespace.h |4 ++-- ipc/namespace.c |2 +- ipc/sem.c |2 +- kernel/fork.c | 14 +++--- kernel/nsproxy.c|4 ++-- kernel/pid_namespace.c |2 +- kernel/user_namespace.c |2 +- kernel/utsname.c|2 +- net/core/net_namespace.c|4 ++-- security/dummy.c|2 +- security/keys/process_keys.c|2 +- security/security.c |2 +- security/selinux/hooks.c|2 +- 51 files changed, 67 insertions(+), 67 deletions(-) Index: 2.6.24-mm1/arch/alpha/kernel/process.c === --- 2.6.24-mm1.orig/arch/alpha/kernel/process.c +++ 2.6.24-mm1/arch/alpha/kernel/process.c @@ -270,7 +270,7 @@ alpha_vfork(struct pt_regs *regs) */ int -copy_thread(int nr, unsigned long clone_flags, unsigned long usp, +copy_thread(int nr, u64 clone_flags, unsigned long usp, unsigned long unused, struct task_struct * p, struct pt_regs * regs) { Index: 2.6.24-mm1/arch/arm/kernel/process.c === --- 2.6.24-mm1.orig/arch/arm/kernel/process.c +++ 2.6.24-mm1/arch/arm/kernel/process.c @@ -331,7 +331,7 @@ void release_thread(struct task_struct * asmlinkage void ret_from_fork(void) __asm__("ret_from_fork"); int -copy_thread(int nr, unsigned long clone_flags, unsigned long stack_start, +copy_thread(int nr, u64 clone_flags, unsigned long stack_start, unsigned long stk_sz, struct task_struct *p, struct pt_regs *regs) { struct thread_info *thread = task_thread_info(p); Index: 2.6.24-mm1/arch/avr32/kernel/process.c === --- 2.6.24-mm1.orig/arch/avr32/kernel/process.c +++ 2.6.24-mm1/arch/avr32/kernel/process.c @@ -325,7 +325,7 @@ int dump_fpu(struct pt_regs *regs, elf_f asmlinkage void ret_from_fork(void); -int copy_thread(int nr, unsigned long clone_flags, unsigned long usp, +int copy_thread(int nr, u64 clone_flags, unsigned long usp, unsigned long unused, struct task_struct *p, struct pt_regs *regs) { Index: 2.6.24-mm1/arch/blackfin/kernel/process.c === --- 2.6.24-mm1.orig/arch/blackfin/kernel/process.c +++ 2.6.24-mm1/arch/blackfin/kernel/process.c @@ -168,7 +168,7 @@ asmlinkage int bfin_clone(struct pt_regs } int -copy_thread(int nr, unsigned long clone_flags, +copy_thread(int nr, u64 clone_flags, unsigned long usp, unsigned long topstk, struct task_struct *p, struct pt_regs *regs) { Index: 2.6.24-mm1/arch/cris/arc
[PATCH 1/2] change clone_flags type to u64
From: Cedric Le Goater [EMAIL PROTECTED] This is a preliminary patch changing the clone_flags type to 64bits for all the routines called by do_fork(). It prepares ground for the next patch which introduces an enhanced version of clone(). This is work in progress. All conversions might not be done yet. Signed-off-by: Cedric Le Goater [EMAIL PROTECTED] --- arch/alpha/kernel/process.c |2 +- arch/arm/kernel/process.c |2 +- arch/avr32/kernel/process.c |2 +- arch/blackfin/kernel/process.c |2 +- arch/cris/arch-v10/kernel/process.c |2 +- arch/cris/arch-v32/kernel/process.c |2 +- arch/frv/kernel/process.c |2 +- arch/h8300/kernel/process.c |2 +- arch/ia64/ia32/sys_ia32.c |2 +- arch/ia64/kernel/process.c |2 +- arch/m32r/kernel/process.c |2 +- arch/m68k/kernel/process.c |2 +- arch/m68knommu/kernel/process.c |2 +- arch/mips/kernel/process.c |2 +- arch/mn10300/kernel/process.c |2 +- arch/parisc/kernel/process.c|2 +- arch/powerpc/kernel/process.c |2 +- arch/s390/kernel/process.c |2 +- arch/sh/kernel/process_32.c |2 +- arch/sh/kernel/process_64.c |2 +- arch/sparc/kernel/process.c |2 +- arch/sparc64/kernel/process.c |2 +- arch/um/kernel/process.c|2 +- arch/v850/kernel/process.c |2 +- arch/x86/kernel/process_32.c|2 +- arch/x86/kernel/process_64.c|2 +- arch/xtensa/kernel/process.c|2 +- fs/namespace.c |2 +- include/linux/ipc_namespace.h |4 ++-- include/linux/key.h |2 +- include/linux/mnt_namespace.h |2 +- include/linux/nsproxy.h |2 +- include/linux/pid_namespace.h |4 ++-- include/linux/sched.h |2 +- include/linux/security.h|6 +++--- include/linux/sem.h |4 ++-- include/linux/user_namespace.h |4 ++-- include/linux/utsname.h |4 ++-- include/net/net_namespace.h |4 ++-- ipc/namespace.c |2 +- ipc/sem.c |2 +- kernel/fork.c | 14 +++--- kernel/nsproxy.c|4 ++-- kernel/pid_namespace.c |2 +- kernel/user_namespace.c |2 +- kernel/utsname.c|2 +- net/core/net_namespace.c|4 ++-- security/dummy.c|2 +- security/keys/process_keys.c|2 +- security/security.c |2 +- security/selinux/hooks.c|2 +- 51 files changed, 67 insertions(+), 67 deletions(-) Index: 2.6.24-mm1/arch/alpha/kernel/process.c === --- 2.6.24-mm1.orig/arch/alpha/kernel/process.c +++ 2.6.24-mm1/arch/alpha/kernel/process.c @@ -270,7 +270,7 @@ alpha_vfork(struct pt_regs *regs) */ int -copy_thread(int nr, unsigned long clone_flags, unsigned long usp, +copy_thread(int nr, u64 clone_flags, unsigned long usp, unsigned long unused, struct task_struct * p, struct pt_regs * regs) { Index: 2.6.24-mm1/arch/arm/kernel/process.c === --- 2.6.24-mm1.orig/arch/arm/kernel/process.c +++ 2.6.24-mm1/arch/arm/kernel/process.c @@ -331,7 +331,7 @@ void release_thread(struct task_struct * asmlinkage void ret_from_fork(void) __asm__(ret_from_fork); int -copy_thread(int nr, unsigned long clone_flags, unsigned long stack_start, +copy_thread(int nr, u64 clone_flags, unsigned long stack_start, unsigned long stk_sz, struct task_struct *p, struct pt_regs *regs) { struct thread_info *thread = task_thread_info(p); Index: 2.6.24-mm1/arch/avr32/kernel/process.c === --- 2.6.24-mm1.orig/arch/avr32/kernel/process.c +++ 2.6.24-mm1/arch/avr32/kernel/process.c @@ -325,7 +325,7 @@ int dump_fpu(struct pt_regs *regs, elf_f asmlinkage void ret_from_fork(void); -int copy_thread(int nr, unsigned long clone_flags, unsigned long usp, +int copy_thread(int nr, u64 clone_flags, unsigned long usp, unsigned long unused, struct task_struct *p, struct pt_regs *regs) { Index: 2.6.24-mm1/arch/blackfin/kernel/process.c === --- 2.6.24-mm1.orig/arch/blackfin/kernel/process.c +++ 2.6.24-mm1/arch/blackfin/kernel/process.c @@ -168,7 +168,7 @@ asmlinkage int bfin_clone(struct pt_regs } int -copy_thread(int nr, unsigned long clone_flags, +copy_thread(int nr, u64 clone_flags, unsigned long usp, unsigned long topstk, struct task_struct *p, struct pt_regs *regs) { Index: 2.6.24-mm1/arch/cris/arch-v10/kernel/process.c
[PATCH 2/2] extend clone_flags using parent_tidptr argument
From: Cedric Le Goater [EMAIL PROTECTED] We have at least 2 patchsets requiring each a new clone flag and there it is, we've reached the limit, none are left. This patch uses the CLONE_DETACHED flag (unused) as a marker to extend the clone flags through the parent_tidptr argument. Initially, we thought on using the last bit but it has recently been taken by CLONE_IO. Obviously, this hack doesn't work for unshare() for which I don't see any other solution than to add a new syscall : long sys_unshare64(unsigned long clone_flags_high, unsigned long clone_flags_low); Is this the right path to extend the clone flags ? should we add a clone64() rather than hack the extending clone() ? Thanks for any comments ! C. Signed-off-by: Cedric Le Goater [EMAIL PROTECTED] --- include/linux/sched.h |1 + kernel/fork.c | 14 +- 2 files changed, 14 insertions(+), 1 deletion(-) Index: 2.6.24-mm1/include/linux/sched.h === --- 2.6.24-mm1.orig/include/linux/sched.h +++ 2.6.24-mm1/include/linux/sched.h @@ -28,6 +28,7 @@ #define CLONE_NEWPID0x2000 /* New pid namespace */ #define CLONE_NEWNET0x4000 /* New network namespace */ #define CLONE_IO0x8000 /* Clone io context */ +#define CLONE_EXTFLAGS CLONE_DETACHED /* use parent_tidptr as an extended set of flags */ /* * Scheduling policies Index: 2.6.24-mm1/kernel/fork.c === --- 2.6.24-mm1.orig/kernel/fork.c +++ 2.6.24-mm1/kernel/fork.c @@ -1012,6 +1012,14 @@ static struct task_struct *copy_process( struct task_struct *p; int cgroup_callbacks_done = 0; + /* +* It is not permitted to specify both CLONE_EXTFLAGS and +* CLONE_PARENT_SETTID +*/ + if ((clone_flags (CLONE_EXTFLAGS|CLONE_PARENT_SETTID)) == + (CLONE_EXTFLAGS|CLONE_PARENT_SETTID)) + return ERR_PTR(-EINVAL); + if ((clone_flags (CLONE_NEWNS|CLONE_FS)) == (CLONE_NEWNS|CLONE_FS)) return ERR_PTR(-EINVAL); @@ -1455,6 +1463,7 @@ long do_fork(unsigned long clone_flags, struct task_struct *p; int trace = 0; long nr; + u64 clone_flags64 = clone_flags; /* * We hope to recycle these flags after 2.6.26 @@ -1479,7 +1488,10 @@ long do_fork(unsigned long clone_flags, clone_flags |= CLONE_PTRACE; } - p = copy_process(clone_flags, stack_start, regs, stack_size, + if (clone_flags CLONE_EXTFLAGS) + clone_flags64 = ((u64) (uintptr_t) parent_tidptr 32) | clone_flags; + + p = copy_process(clone_flags64, stack_start, regs, stack_size, child_tidptr, NULL); /* * Do this prior waking up the new thread - the thread pointer -- To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH 2.6.24-rc8-mm1 09/15] (RFC) IPC: new kernel API to change an ID
Hello Kirill ! Kirill Korotaev wrote: Pierre, my point is that after you've added interface "set IPCID", you'll need more and more for checkpointing: - "create/setup conntrack" (otherwise connections get dropped), - "set task start time" (needed for Oracle checkpointing BTW), - "set some statistics counters (e.g. networking or taskstats)" - "restore inotify" and so on and so forth. right. we know that we will have to handle a lot of these and more and we will need an API for it :) so how should we handle it ? through a dedicated syscall that would be able to checkpoint and/or restart a process, an ipc object, an ipc namespace, a full container ? will it take a fd or a big binary blob ? I personally really liked Pavel idea's of filesystem. but we dropped the thread. that's for the user API but we will need also kernel services to expose (checkpoint) states and restore them. If it's too early to talk about the user API, we could try first to refactor the kernel internals to expose correctly what we need. That's what Pierre's patchset is trying to do. Cheers, C. -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH 2.6.24-rc8-mm1 09/15] (RFC) IPC: new kernel API to change an ID
Hello Kirill ! Kirill Korotaev wrote: Pierre, my point is that after you've added interface set IPCID, you'll need more and more for checkpointing: - create/setup conntrack (otherwise connections get dropped), - set task start time (needed for Oracle checkpointing BTW), - set some statistics counters (e.g. networking or taskstats) - restore inotify and so on and so forth. right. we know that we will have to handle a lot of these and more and we will need an API for it :) so how should we handle it ? through a dedicated syscall that would be able to checkpoint and/or restart a process, an ipc object, an ipc namespace, a full container ? will it take a fd or a big binary blob ? I personally really liked Pavel idea's of filesystem. but we dropped the thread. that's for the user API but we will need also kernel services to expose (checkpoint) states and restore them. If it's too early to talk about the user API, we could try first to refactor the kernel internals to expose correctly what we need. That's what Pierre's patchset is trying to do. Cheers, C. -- To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH 1/2] Extend sys_clone and sys_unshare system calls API
Pavel Machek wrote: > On Wed 2008-01-16 15:58:55, Pavel Emelyanov wrote: >> There's only one bit in the clone_flags left, so we won't be able >> to create more namespaces after we make it busy. Besides, for >> checkpoint/restart jobs we might want to create tasks with given >> pids (virtual of course). And nobody knows for sure what else might >> be required from clone() in the future. >> >> This is an attempt to create a extendable API for clone and unshare. >> Actually this patch is a request for comment about the overall >> design. If it will turn out to "look good", then we'll select some >> better names for new flag and data types. >> >> I use the last bit in the clone_flags for CLONE_LONGARG. When set it >> will denote that the child_tidptr is not a pointer to a tid storage, >> but the pointer to the struct long_clone_struct which currently >> looks like this: >> >> struct long_clone_arg { >> int size; >> }; > > Ugly as night, I'd say. (Al said it better). What about just adding > clone2 syscall, that takes u64? yes but we would need more something like : long sys_clone64(unsigned long flags_high, unsigned long flag_low) if we want the syscall to be supported on 32bit arch. clone2 is also being used on ia64 already. C. -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH 1/2] Extend sys_clone and sys_unshare system calls API
Pavel Machek wrote: On Wed 2008-01-16 15:58:55, Pavel Emelyanov wrote: There's only one bit in the clone_flags left, so we won't be able to create more namespaces after we make it busy. Besides, for checkpoint/restart jobs we might want to create tasks with given pids (virtual of course). And nobody knows for sure what else might be required from clone() in the future. This is an attempt to create a extendable API for clone and unshare. Actually this patch is a request for comment about the overall design. If it will turn out to look good, then we'll select some better names for new flag and data types. I use the last bit in the clone_flags for CLONE_LONGARG. When set it will denote that the child_tidptr is not a pointer to a tid storage, but the pointer to the struct long_clone_struct which currently looks like this: struct long_clone_arg { int size; }; Ugly as night, I'd say. (Al said it better). What about just adding clone2 syscall, that takes u64? yes but we would need more something like : long sys_clone64(unsigned long flags_high, unsigned long flag_low) if we want the syscall to be supported on 32bit arch. clone2 is also being used on ia64 already. C. -- To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH 1/2] Extend sys_clone and sys_unshare system calls API
Al Viro wrote: > On Wed, Jan 16, 2008 at 07:23:40AM -0700, Jonathan Corbet wrote: >> Hi, Pavel, >> >> [Adding Ulrich] >> >>> I use the last bit in the clone_flags for CLONE_LONGARG. When set it >>> will denote that the child_tidptr is not a pointer to a tid storage, >>> but the pointer to the struct long_clone_struct which currently >>> looks like this: >> I'm probably just totally off the deep end, but something did occur to >> me: this looks an awful lot like a special version of the sys_indirect() >> idea. Unless it has been somehow decided that sys_indirect() is the >> wrong idea, might it not be better to look at making that interface >> solve the extended clone() problem as well? > > Nah, just put an XML parser into the kernel to have the form match the > contents... > > Al "perhaps we should newgroup alt.tasteless.api for all that stuff" Viro so you'd rather have new syscalls to support new clone flags ? something like : long sys_clone64(unsigned long flags_high, unsigned long flag_low) long sys_unshare64(unsigned long flags_high, unsigned long flag_low) Thanks, C. -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: 2.6.24-rc4-mm1 - BUG in tcp_fragment
Ilpo Järvinen wrote: > On Thu, 13 Dec 2007, Cedric Le Goater wrote: > >> I got this one while compiling on NFS. >> >> C. >> >> kernel BUG at /home/legoater/linux/2.6.24-rc4-mm1/include/net/tcp.h:1480! > > I'm not exactly sure what patches you have applied and which patches are > not, with rc4-mm1 there are two patches (first one was incomplete, I > assume you had at least that one based on your other mail) to really fix > the issues in (__|)tcp_reset_fack_counts(...). Yes I only have the first patch you sent on lkml on top of 2.6.24-rc4-mm1. attached below. I didn't see the second one on lkml ? > However, there seems to be so much breakage that I have a bit trouble to > decide where to start... The situation seems bit scary :-). my n/w environment seems to reproduce these issues quite easily. if you need some testing, just ping me. Cheers, C. > So, I might soon prepare a revert patch for most of the questionable > TCP parts and ask Dave to apply it (and drop them fully during next > rebase) unless I suddently figure something out soon which explains > all/most of the problems, then return to drawing board. ...As it seems > that the cumulative ACK processing problem discovered later on (having > rather cumbersome solution with skbs only) will make part of the work > that's currently in net-2.6.25 quite useless/duplicate effort. But thanks > anyway for reporting these. > > Subject: [PATCH] [TCP]: Fix fack_count miscountings (multiple places) 1) Fack_count is set incorrectly if the highest sent skb is already sacked (the skb->prev won't return it because it's on the other list already). These manifest as fackets_out counting error later on, the second-order effects are very hard to track, so it may fix all out-standing TCP bug reports. 2) Prev == NULL check was wrong way around 3) Last skb's fack count was incorrectly skipped while() {} loop Signed-off-by: Ilpo Järvinen <[EMAIL PROTECTED]> --- include/net/tcp.h | 22 -- 1 files changed, 16 insertions(+), 6 deletions(-) diff --git a/include/net/tcp.h b/include/net/tcp.h index 9dbed0b..11a7e3e 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1337,10 +1337,20 @@ static inline struct sk_buff *tcp_send_head(struct sock *sk) static inline void tcp_advance_send_head(struct sock *sk, struct sk_buff *skb) { struct sk_buff *prev = tcp_write_queue_prev(sk, skb); + unsigned int fc = 0; + + if (prev == (struct sk_buff *)>sk_write_queue) + prev = NULL; + else if (!tcp_skb_adjacent(sk, prev, skb)) + prev = NULL; - if (prev != (struct sk_buff *)>sk_write_queue) - TCP_SKB_CB(skb)->fack_count = TCP_SKB_CB(prev)->fack_count + - tcp_skb_pcount(prev); + if ((prev == NULL) && !__tcp_write_queue_empty(sk, TCP_WQ_SACKED)) + prev = __tcp_write_queue_tail(sk, TCP_WQ_SACKED); + + if (prev != NULL) + fc = TCP_SKB_CB(prev)->fack_count + tcp_skb_pcount(prev); + + TCP_SKB_CB(skb)->fack_count = fc; sk->sk_send_head = tcp_write_queue_next(sk, skb); if (sk->sk_send_head == (struct sk_buff *)>sk_write_queue) @@ -1464,7 +1474,7 @@ static inline struct sk_buff *__tcp_reset_fack_counts(struct sock *sk, { unsigned int fc = 0; - if (prev == NULL) + if (prev != NULL) fc = TCP_SKB_CB(*prev)->fack_count + tcp_skb_pcount(*prev); BUG_ON((*prev != NULL) && !tcp_skb_adjacent(sk, *prev, skb)); @@ -1521,7 +1531,7 @@ static inline void tcp_reset_fack_counts(struct sock *sk, struct sk_buff *inskb) skb[otherq] = prev->next; } - while (skb[queue] != __tcp_write_queue_tail(sk, queue)) { + do { /* Lazy find for the other queue */ if (skb[queue] == NULL) { skb[queue] = tcp_write_queue_find(sk, TCP_SKB_CB(prev)->seq, @@ -1535,7 +1545,7 @@ static inline void tcp_reset_fack_counts(struct sock *sk, struct sk_buff *inskb) break; queue ^= TCP_WQ_SACKED; - } + } while (skb[queue] != __tcp_write_queue_tail(sk, queue)); } static inline void __tcp_insert_write_queue_after(struct sk_buff *skb, -- 1.5.0.6 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: 2.6.24-rc4-mm1 - BUG in tcp_fragment
Andrew Morton wrote: > Temporarily at > > http://userweb.kernel.org/~akpm/2.6.24-rc4-mm1/ > > Will appear later at > > > ftp://ftp.kernel.org/pub/linux/kernel/people/akpm/patches/2.6/2.6.24-rc4/2.6.24-rc4-mm1/ I got this one while compiling on NFS. C. kernel BUG at /home/legoater/linux/2.6.24-rc4-mm1/include/net/tcp.h:1480! invalid opcode: [1] SMP last sysfs file: /sys/devices/pci:00/:00:1e.0/:01:01.0/local_cpus CPU 1 Modules linked in: autofs4 nfs lockd sunrpc tg3 sg joydev ext3 jbd ehci_hcd ohci_hcd uhci_hcd Pid: 0, comm: swapper Not tainted 2.6.24-rc4-mm1 #3 RIP: 0010:[] [] tcp_fragment+0x5ee/0x6f7 RSP: 0018:810147c9f9e0 EFLAGS: 00010217 RAX: 1526c311 RBX: 8100c2ce1d00 RCX: 810143cc6aa0 RDX: 0001 RSI: 810102b37b00 RDI: 810102b37b50 RBP: 810147c9fa50 R08: 004a R09: 0001 R10: 0b50 R11: 0001 R12: 81013a575700 R13: R14: 810143cc6400 R15: 81013a575750 FS: () GS:810147c57140() knlGS: CS: 0010 DS: 0018 ES: 0018 CR0: 8005003b CR2: 2ad5d294b000 CR3: bd11b000 CR4: 06e0 DR0: DR1: DR2: DR3: DR6: 0ff0 DR7: 0400 Process swapper (pid: 0, threadinfo 810147c98000, task 810147c89040) Stack: 810147c9fa00 05a843cc6400 810143cc6400 810147c9fa70 8100c2ce1d50 810143cc6590 810143cc6aa0 15265421 810143cc6400 810143cc6400 81013a575700 Call Trace: [] tcp_retransmit_skb+0xd6/0x713 [] tcp_xmit_retransmit_queue+0xd0/0x330 [] tcp_fastretrans_alert+0xb92/0xbf2 [] tcp_ack+0xdf3/0xfbe [] tcp_rcv_established+0x66a/0x76d [] tcp_v4_do_rcv+0x37/0x3aa [] tcp_v4_rcv+0x9a9/0xa76 [] ip_local_deliver_finish+0x161/0x23c [] ip_local_deliver+0x72/0x77 [] ip_rcv_finish+0x371/0x3b5 [] ip_rcv+0x292/0x2c6 [] netif_receive_skb+0x267/0x340 [] :tg3:tg3_poll+0x5d2/0x89e [] net_rx_action+0xd5/0x1ad [] __do_softirq+0x5f/0xe3 [] call_softirq+0x1c/0x28 [] do_softirq+0x39/0x9f [] irq_exit+0x4e/0x50 [] do_IRQ+0xb7/0xd7 [] mwait_idle+0x0/0x55 [] ret_from_intr+0x0/0xf [] __atomic_notifier_call_chain+0x20/0x83 [] mwait_idle+0x48/0x55 [] enter_idle+0x22/0x24 [] cpu_idle+0xa1/0xc5 [] start_secondary+0x3b9/0x3c5 Code: 0f 0b eb fe 48 85 f6 74 08 8b 46 6c 3b 41 68 75 55 48 8d 41 RIP [] tcp_fragment+0x5ee/0x6f7 RSP Kernel panic - not syncing: Aiee, killing interrupt handler! -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
tcp_sacktag_one() WARNING (was Re: 2.6.24-rc4-mm1)
Cedric Le Goater wrote: > Ilpo Järvinen wrote: >> On Wed, 5 Dec 2007, Andrew Morton wrote: >> >>> On Thu, 06 Dec 2007 17:59:37 +1100 Reuben Farrelly <[EMAIL PROTECTED]> >>> wrote: >>> >>>> This non fatal oops which I have just noticed may be related to this >>>> change then >>>> - certainly looks networking related. >>> yep, but it isn't e1000. It's core TCP. >>> >>>> WARNING: at net/ipv4/tcp_input.c:2518 tcp_fastretrans_alert() >>>> Pid: 0, comm: swapper Not tainted 2.6.24-rc4-mm1 #1 >>> Ilpo, Reuben's kernel is talking to you ;) >> ...Please try the patch below. Andrew, this probably fixes your problem >> (the packets <= tp->packets_out) as well. > > nah. I got the WARNINGs again with this patch. I got this new one on a 2.6.24-rc5-mm1. It looked similar ? C. WARNING: at /home/legoater/linux/2.6.24-rc5-mm1/net/ipv4/tcp_input.c:1280 tcp_sacktag_one() Pid: 0, comm: swapper Not tainted 2.6.24-rc5-mm1 #1 Call Trace: [] tcp_sacktag_walk+0x2bc/0x62a [] tcp_sacktag_write_queue+0x595/0xa7c [] kfree+0xd4/0xe0 [] tcp_ack+0x2a7/0xfc7 [] mark_held_locks+0x47/0x6a [] trace_hardirqs_on+0xfe/0x139 [] tcp_rcv_established+0x66a/0x76d [] tcp_v4_do_rcv+0x37/0x3aa [] tcp_v4_rcv+0x9a9/0xa76 [] ip_local_deliver_finish+0x161/0x23c [] ip_local_deliver+0x72/0x77 [] ip_rcv_finish+0x371/0x3b5 [] ip_rcv+0x292/0x2c6 [] netif_receive_skb+0x267/0x340 [] :tg3:tg3_poll+0x5d2/0x89e [] net_rx_action+0xd5/0x1ad [] __do_softirq+0x5f/0xe3 [] call_softirq+0x1c/0x28 [] do_softirq+0x39/0x9f [] irq_exit+0x4e/0x50 [] do_IRQ+0xb7/0xd7 [] mwait_idle+0x0/0x52 [] ret_from_intr+0x0/0xf [] __atomic_notifier_call_chain+0x20/0x83 [] mwait_idle+0x48/0x52 [] enter_idle+0x22/0x24 [] cpu_idle+0xa1/0xc5 [] start_secondary+0x3b9/0x3c5 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
tcp_sacktag_one() WARNING (was Re: 2.6.24-rc4-mm1)
Cedric Le Goater wrote: Ilpo Järvinen wrote: On Wed, 5 Dec 2007, Andrew Morton wrote: On Thu, 06 Dec 2007 17:59:37 +1100 Reuben Farrelly [EMAIL PROTECTED] wrote: This non fatal oops which I have just noticed may be related to this change then - certainly looks networking related. yep, but it isn't e1000. It's core TCP. WARNING: at net/ipv4/tcp_input.c:2518 tcp_fastretrans_alert() Pid: 0, comm: swapper Not tainted 2.6.24-rc4-mm1 #1 Ilpo, Reuben's kernel is talking to you ;) ...Please try the patch below. Andrew, this probably fixes your problem (the packets = tp-packets_out) as well. nah. I got the WARNINGs again with this patch. I got this new one on a 2.6.24-rc5-mm1. It looked similar ? C. WARNING: at /home/legoater/linux/2.6.24-rc5-mm1/net/ipv4/tcp_input.c:1280 tcp_sacktag_one() Pid: 0, comm: swapper Not tainted 2.6.24-rc5-mm1 #1 Call Trace: IRQ [80410e0e] tcp_sacktag_walk+0x2bc/0x62a [80411711] tcp_sacktag_write_queue+0x595/0xa7c [8028ce66] kfree+0xd4/0xe0 [80411e9f] tcp_ack+0x2a7/0xfc7 [80252ca1] mark_held_locks+0x47/0x6a [80252e5c] trace_hardirqs_on+0xfe/0x139 [80415d59] tcp_rcv_established+0x66a/0x76d [8041bd35] tcp_v4_do_rcv+0x37/0x3aa [8041e623] tcp_v4_rcv+0x9a9/0xa76 [80401832] ip_local_deliver_finish+0x161/0x23c [80401d47] ip_local_deliver+0x72/0x77 [8040168d] ip_rcv_finish+0x371/0x3b5 [80401ca1] ip_rcv+0x292/0x2c6 [803e2aae] netif_receive_skb+0x267/0x340 [8806eff4] :tg3:tg3_poll+0x5d2/0x89e [803e505c] net_rx_action+0xd5/0x1ad [8023b0b9] __do_softirq+0x5f/0xe3 [8020c8ec] call_softirq+0x1c/0x28 [8020e7b9] do_softirq+0x39/0x9f [8023b058] irq_exit+0x4e/0x50 [8020e900] do_IRQ+0xb7/0xd7 [8020a892] mwait_idle+0x0/0x52 [8020bbe6] ret_from_intr+0x0/0xf EOI [8024d0cb] __atomic_notifier_call_chain+0x20/0x83 [8020a8da] mwait_idle+0x48/0x52 [80209e79] enter_idle+0x22/0x24 [8020a822] cpu_idle+0xa1/0xc5 [8021e755] start_secondary+0x3b9/0x3c5 -- To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: 2.6.24-rc4-mm1 - BUG in tcp_fragment
Andrew Morton wrote: Temporarily at http://userweb.kernel.org/~akpm/2.6.24-rc4-mm1/ Will appear later at ftp://ftp.kernel.org/pub/linux/kernel/people/akpm/patches/2.6/2.6.24-rc4/2.6.24-rc4-mm1/ I got this one while compiling on NFS. C. kernel BUG at /home/legoater/linux/2.6.24-rc4-mm1/include/net/tcp.h:1480! invalid opcode: [1] SMP last sysfs file: /sys/devices/pci:00/:00:1e.0/:01:01.0/local_cpus CPU 1 Modules linked in: autofs4 nfs lockd sunrpc tg3 sg joydev ext3 jbd ehci_hcd ohci_hcd uhci_hcd Pid: 0, comm: swapper Not tainted 2.6.24-rc4-mm1 #3 RIP: 0010:[80418d93] [80418d93] tcp_fragment+0x5ee/0x6f7 RSP: 0018:810147c9f9e0 EFLAGS: 00010217 RAX: 1526c311 RBX: 8100c2ce1d00 RCX: 810143cc6aa0 RDX: 0001 RSI: 810102b37b00 RDI: 810102b37b50 RBP: 810147c9fa50 R08: 004a R09: 0001 R10: 0b50 R11: 0001 R12: 81013a575700 R13: R14: 810143cc6400 R15: 81013a575750 FS: () GS:810147c57140() knlGS: CS: 0010 DS: 0018 ES: 0018 CR0: 8005003b CR2: 2ad5d294b000 CR3: bd11b000 CR4: 06e0 DR0: DR1: DR2: DR3: DR6: 0ff0 DR7: 0400 Process swapper (pid: 0, threadinfo 810147c98000, task 810147c89040) Stack: 810147c9fa00 05a843cc6400 810143cc6400 810147c9fa70 8100c2ce1d50 810143cc6590 810143cc6aa0 15265421 810143cc6400 810143cc6400 81013a575700 Call Trace: IRQ [804190c7] tcp_retransmit_skb+0xd6/0x713 [804197d4] tcp_xmit_retransmit_queue+0xd0/0x330 [8041209b] tcp_fastretrans_alert+0xb92/0xbf2 [80413f30] tcp_ack+0xdf3/0xfbe [80417295] tcp_rcv_established+0x66a/0x76d [8041d285] tcp_v4_do_rcv+0x37/0x3aa [8041fb73] tcp_v4_rcv+0x9a9/0xa76 [80402e4e] ip_local_deliver_finish+0x161/0x23c [80403363] ip_local_deliver+0x72/0x77 [80402ca9] ip_rcv_finish+0x371/0x3b5 [804032bd] ip_rcv+0x292/0x2c6 [803e3dcc] netif_receive_skb+0x267/0x340 [8806eff4] :tg3:tg3_poll+0x5d2/0x89e [803e639d] net_rx_action+0xd5/0x1ad [8023b605] __do_softirq+0x5f/0xe3 [8020c86c] call_softirq+0x1c/0x28 [8020e739] do_softirq+0x39/0x9f [8023b5a4] irq_exit+0x4e/0x50 [8020e880] do_IRQ+0xb7/0xd7 [8020a803] mwait_idle+0x0/0x55 [8020bb66] ret_from_intr+0x0/0xf EOI [8024d623] __atomic_notifier_call_chain+0x20/0x83 [8020a84b] mwait_idle+0x48/0x55 [80209e79] enter_idle+0x22/0x24 [8020a793] cpu_idle+0xa1/0xc5 [8021dfd5] start_secondary+0x3b9/0x3c5 Code: 0f 0b eb fe 48 85 f6 74 08 8b 46 6c 3b 41 68 75 55 48 8d 41 RIP [80418d93] tcp_fragment+0x5ee/0x6f7 RSP 810147c9f9e0 Kernel panic - not syncing: Aiee, killing interrupt handler! -- To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: 2.6.24-rc4-mm1 - BUG in tcp_fragment
Ilpo Järvinen wrote: On Thu, 13 Dec 2007, Cedric Le Goater wrote: I got this one while compiling on NFS. C. kernel BUG at /home/legoater/linux/2.6.24-rc4-mm1/include/net/tcp.h:1480! I'm not exactly sure what patches you have applied and which patches are not, with rc4-mm1 there are two patches (first one was incomplete, I assume you had at least that one based on your other mail) to really fix the issues in (__|)tcp_reset_fack_counts(...). Yes I only have the first patch you sent on lkml on top of 2.6.24-rc4-mm1. attached below. I didn't see the second one on lkml ? However, there seems to be so much breakage that I have a bit trouble to decide where to start... The situation seems bit scary :-). my n/w environment seems to reproduce these issues quite easily. if you need some testing, just ping me. Cheers, C. So, I might soon prepare a revert patch for most of the questionable TCP parts and ask Dave to apply it (and drop them fully during next rebase) unless I suddently figure something out soon which explains all/most of the problems, then return to drawing board. ...As it seems that the cumulative ACK processing problem discovered later on (having rather cumbersome solution with skbs only) will make part of the work that's currently in net-2.6.25 quite useless/duplicate effort. But thanks anyway for reporting these. Subject: [PATCH] [TCP]: Fix fack_count miscountings (multiple places) 1) Fack_count is set incorrectly if the highest sent skb is already sacked (the skb-prev won't return it because it's on the other list already). These manifest as fackets_out counting error later on, the second-order effects are very hard to track, so it may fix all out-standing TCP bug reports. 2) Prev == NULL check was wrong way around 3) Last skb's fack count was incorrectly skipped while() {} loop Signed-off-by: Ilpo Järvinen [EMAIL PROTECTED] --- include/net/tcp.h | 22 -- 1 files changed, 16 insertions(+), 6 deletions(-) diff --git a/include/net/tcp.h b/include/net/tcp.h index 9dbed0b..11a7e3e 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1337,10 +1337,20 @@ static inline struct sk_buff *tcp_send_head(struct sock *sk) static inline void tcp_advance_send_head(struct sock *sk, struct sk_buff *skb) { struct sk_buff *prev = tcp_write_queue_prev(sk, skb); + unsigned int fc = 0; + + if (prev == (struct sk_buff *)sk-sk_write_queue) + prev = NULL; + else if (!tcp_skb_adjacent(sk, prev, skb)) + prev = NULL; - if (prev != (struct sk_buff *)sk-sk_write_queue) - TCP_SKB_CB(skb)-fack_count = TCP_SKB_CB(prev)-fack_count + - tcp_skb_pcount(prev); + if ((prev == NULL) !__tcp_write_queue_empty(sk, TCP_WQ_SACKED)) + prev = __tcp_write_queue_tail(sk, TCP_WQ_SACKED); + + if (prev != NULL) + fc = TCP_SKB_CB(prev)-fack_count + tcp_skb_pcount(prev); + + TCP_SKB_CB(skb)-fack_count = fc; sk-sk_send_head = tcp_write_queue_next(sk, skb); if (sk-sk_send_head == (struct sk_buff *)sk-sk_write_queue) @@ -1464,7 +1474,7 @@ static inline struct sk_buff *__tcp_reset_fack_counts(struct sock *sk, { unsigned int fc = 0; - if (prev == NULL) + if (prev != NULL) fc = TCP_SKB_CB(*prev)-fack_count + tcp_skb_pcount(*prev); BUG_ON((*prev != NULL) !tcp_skb_adjacent(sk, *prev, skb)); @@ -1521,7 +1531,7 @@ static inline void tcp_reset_fack_counts(struct sock *sk, struct sk_buff *inskb) skb[otherq] = prev-next; } - while (skb[queue] != __tcp_write_queue_tail(sk, queue)) { + do { /* Lazy find for the other queue */ if (skb[queue] == NULL) { skb[queue] = tcp_write_queue_find(sk, TCP_SKB_CB(prev)-seq, @@ -1535,7 +1545,7 @@ static inline void tcp_reset_fack_counts(struct sock *sk, struct sk_buff *inskb) break; queue ^= TCP_WQ_SACKED; - } + } while (skb[queue] != __tcp_write_queue_tail(sk, queue)); } static inline void __tcp_insert_write_queue_after(struct sk_buff *skb, -- 1.5.0.6 -- To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: 2.6.24-rc4-mm1
Ilpo Järvinen wrote: > On Wed, 5 Dec 2007, Andrew Morton wrote: > >> On Thu, 06 Dec 2007 17:59:37 +1100 Reuben Farrelly <[EMAIL PROTECTED]> wrote: >> >>> This non fatal oops which I have just noticed may be related to this change >>> then >>> - certainly looks networking related. >> yep, but it isn't e1000. It's core TCP. >> >>> WARNING: at net/ipv4/tcp_input.c:2518 tcp_fastretrans_alert() >>> Pid: 0, comm: swapper Not tainted 2.6.24-rc4-mm1 #1 >> Ilpo, Reuben's kernel is talking to you ;) > > ...Please try the patch below. Andrew, this probably fixes your problem > (the packets <= tp->packets_out) as well. nah. I got the WARNINGs again with this patch. C. > Dave, please include this one to net-2.6.25. > > -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: 2.6.24-rc4-mm1
Ilpo Järvinen wrote: > On Wed, 5 Dec 2007, David Miller wrote: > >> From: Reuben Farrelly <[EMAIL PROTECTED]> >> Date: Thu, 06 Dec 2007 17:59:37 +1100 >> >>> On 5/12/2007 4:17 PM, Andrew Morton wrote: - Lots of device IDs have been removed from the e1000 driver and moved over to e1000e. So if your e1000 stops working, you forgot to set CONFIG_E1000E. >>> This non fatal oops which I have just noticed may be related to this change >>> then >>> - certainly looks networking related. >>> >>> WARNING: at net/ipv4/tcp_input.c:2518 tcp_fastretrans_alert() >>> Pid: 0, comm: swapper Not tainted 2.6.24-rc4-mm1 #1 >>> >>> Call Trace: >>> [] tcp_fastretrans_alert+0x229/0xe63 >>> [] tcp_ack+0xa3f/0x127d >>> [] tcp_rcv_established+0x55f/0x7f8 >>> [] tcp_v4_do_rcv+0xdb/0x3a7 >>> [] :nf_conntrack:nf_ct_deliver_cached_events+0x75/0x99 >> No, it's from TCP assertions and changes added by Ilpo to the >> net-2.6.25 tree recently. > > Yeah, this (very likely) due to the new SACK processing (in net-2.6.25). > I'll look what could go wrong with fack_count calculations, most likely > it's the reason (I've found earlier one out-of-place retransmission > segment in one of my test case which already indicated that there's > something incorrect with them but didn't have time to debug it yet). > > Thanks for report. Some info about how easily you can reproduce & > couple of sentences about the test case might be useful later on when > evaluating the fix. I also got plenty of these when untaring a tarball on NFS. C. WARNING: at /home/legoater/linux/2.6.24-rc4-mm1/net/ipv4/tcp_input.c:2518 tcp_fastretrans_alert() Pid: 0, comm: swapper Not tainted 2.6.24-rc4-mm1 #2 Call Trace: [] tcp_fastretrans_alert+0xb6/0xbf2 [] tcp_ack+0xdf3/0xfbe [] sk_reset_timer+0x17/0x23 [] tcp_rcv_established+0xf3/0x76d [] tcp_v4_do_rcv+0x37/0x3aa [] tcp_v4_rcv+0x9a9/0xa76 [] ip_local_deliver_finish+0x161/0x23c [] ip_local_deliver+0x72/0x77 [] ip_rcv_finish+0x371/0x3b5 [] ip_rcv+0x292/0x2c6 [] netif_receive_skb+0x267/0x340 [] :tg3:tg3_poll+0x5d2/0x89e [] net_rx_action+0xd5/0x1ad [] __do_softirq+0x5f/0xe3 [] call_softirq+0x1c/0x28 [] do_softirq+0x39/0x9f [] irq_exit+0x4e/0x50 [] do_IRQ+0xb7/0xd7 [] mwait_idle+0x0/0x55 [] ret_from_intr+0x0/0xf [] __atomic_notifier_call_chain+0x20/0x83 [] mwait_idle+0x48/0x55 [] enter_idle+0x22/0x24 [] cpu_idle+0xa1/0xc5 [] start_secondary+0x3b9/0x3c5 WARNING: at /home/legoater/linux/2.6.24-rc4-mm1/net/ipv4/tcp_input.c:2518 tcp_fastretrans_alert() Pid: 0, comm: swapper Not tainted 2.6.24-rc4-mm1 #2 Call Trace: [] tcp_fastretrans_alert+0xb6/0xbf2 [] tcp_ack+0xdf3/0xfbe [] tcp_data_queue+0x5da/0xb0a [] tcp_rcv_established+0xf3/0x76d [] tcp_v4_do_rcv+0x37/0x3aa [] tcp_v4_rcv+0x9a9/0xa76 [] ip_local_deliver_finish+0x161/0x23c [] ip_local_deliver+0x72/0x77 [] ip_rcv_finish+0x371/0x3b5 [] ip_rcv+0x292/0x2c6 [] netif_receive_skb+0x267/0x340 [] :tg3:tg3_poll+0x5d2/0x89e [] net_rx_action+0xd5/0x1ad [] __do_softirq+0x5f/0xe3 [] call_softirq+0x1c/0x28 [] do_softirq+0x39/0x9f [] irq_exit+0x4e/0x50 [] do_IRQ+0xb7/0xd7 [] mwait_idle+0x0/0x55 [] ret_from_intr+0x0/0xf [] __atomic_notifier_call_chain+0x20/0x83 [] mwait_idle+0x48/0x55 [] enter_idle+0x22/0x24 [] cpu_idle+0xa1/0xc5 [] start_secondary+0x3b9/0x3c5 WARNING: at /home/legoater/linux/2.6.24-rc4-mm1/net/ipv4/tcp_input.c:2518 tcp_fastretrans_alert() Pid: 0, comm: swapper Not tainted 2.6.24-rc4-mm1 #2 Call Trace: [] tcp_fastretrans_alert+0xb6/0xbf2 [] tcp_ack+0xdf3/0xfbe [] tcp_data_queue+0x5da/0xb0a [] tcp_rcv_established+0xf3/0x76d [] tcp_v4_do_rcv+0x37/0x3aa [] tcp_v4_rcv+0x9a9/0xa76 [] ip_local_deliver_finish+0x161/0x23c [] ip_local_deliver+0x72/0x77 [] ip_rcv_finish+0x371/0x3b5 [] ip_rcv+0x292/0x2c6 [] netif_receive_skb+0x267/0x340 [] :tg3:tg3_poll+0x5d2/0x89e [] net_rx_action+0xd5/0x1ad [] __do_softirq+0x5f/0xe3 [] call_softirq+0x1c/0x28 [] do_softirq+0x39/0x9f [] irq_exit+0x4e/0x50 [] do_IRQ+0xb7/0xd7 [] mwait_idle+0x0/0x55 [] ret_from_intr+0x0/0xf [] __atomic_notifier_call_chain+0x20/0x83 [] mwait_idle+0x48/0x55 [] enter_idle+0x22/0x24 [] cpu_idle+0xa1/0xc5 [] start_secondary+0x3b9/0x3c5 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: 2.6.24-rc4-mm1
Ilpo Järvinen wrote: On Wed, 5 Dec 2007, David Miller wrote: From: Reuben Farrelly [EMAIL PROTECTED] Date: Thu, 06 Dec 2007 17:59:37 +1100 On 5/12/2007 4:17 PM, Andrew Morton wrote: - Lots of device IDs have been removed from the e1000 driver and moved over to e1000e. So if your e1000 stops working, you forgot to set CONFIG_E1000E. This non fatal oops which I have just noticed may be related to this change then - certainly looks networking related. WARNING: at net/ipv4/tcp_input.c:2518 tcp_fastretrans_alert() Pid: 0, comm: swapper Not tainted 2.6.24-rc4-mm1 #1 Call Trace: IRQ [8046e038] tcp_fastretrans_alert+0x229/0xe63 [80470975] tcp_ack+0xa3f/0x127d [804747b7] tcp_rcv_established+0x55f/0x7f8 [8047b1aa] tcp_v4_do_rcv+0xdb/0x3a7 [881148a8] :nf_conntrack:nf_ct_deliver_cached_events+0x75/0x99 No, it's from TCP assertions and changes added by Ilpo to the net-2.6.25 tree recently. Yeah, this (very likely) due to the new SACK processing (in net-2.6.25). I'll look what could go wrong with fack_count calculations, most likely it's the reason (I've found earlier one out-of-place retransmission segment in one of my test case which already indicated that there's something incorrect with them but didn't have time to debug it yet). Thanks for report. Some info about how easily you can reproduce couple of sentences about the test case might be useful later on when evaluating the fix. I also got plenty of these when untaring a tarball on NFS. C. WARNING: at /home/legoater/linux/2.6.24-rc4-mm1/net/ipv4/tcp_input.c:2518 tcp_fastretrans_alert() Pid: 0, comm: swapper Not tainted 2.6.24-rc4-mm1 #2 Call Trace: IRQ [804115bf] tcp_fastretrans_alert+0xb6/0xbf2 [80413f30] tcp_ack+0xdf3/0xfbe [803da8fb] sk_reset_timer+0x17/0x23 [80416d1e] tcp_rcv_established+0xf3/0x76d [8041d231] tcp_v4_do_rcv+0x37/0x3aa [8041fb1f] tcp_v4_rcv+0x9a9/0xa76 [80402e4e] ip_local_deliver_finish+0x161/0x23c [80403363] ip_local_deliver+0x72/0x77 [80402ca9] ip_rcv_finish+0x371/0x3b5 [804032bd] ip_rcv+0x292/0x2c6 [803e3dcc] netif_receive_skb+0x267/0x340 [8806eff4] :tg3:tg3_poll+0x5d2/0x89e [803e639d] net_rx_action+0xd5/0x1ad [8023b605] __do_softirq+0x5f/0xe3 [8020c86c] call_softirq+0x1c/0x28 [8020e739] do_softirq+0x39/0x9f [8023b5a4] irq_exit+0x4e/0x50 [8020e880] do_IRQ+0xb7/0xd7 [8020a803] mwait_idle+0x0/0x55 [8020bb66] ret_from_intr+0x0/0xf EOI [8024d623] __atomic_notifier_call_chain+0x20/0x83 [8020a84b] mwait_idle+0x48/0x55 [80209e79] enter_idle+0x22/0x24 [8020a793] cpu_idle+0xa1/0xc5 [8021dfd5] start_secondary+0x3b9/0x3c5 WARNING: at /home/legoater/linux/2.6.24-rc4-mm1/net/ipv4/tcp_input.c:2518 tcp_fastretrans_alert() Pid: 0, comm: swapper Not tainted 2.6.24-rc4-mm1 #2 Call Trace: IRQ [804115bf] tcp_fastretrans_alert+0xb6/0xbf2 [80413f30] tcp_ack+0xdf3/0xfbe [804153b8] tcp_data_queue+0x5da/0xb0a [80416d1e] tcp_rcv_established+0xf3/0x76d [8041d231] tcp_v4_do_rcv+0x37/0x3aa [8041fb1f] tcp_v4_rcv+0x9a9/0xa76 [80402e4e] ip_local_deliver_finish+0x161/0x23c [80403363] ip_local_deliver+0x72/0x77 [80402ca9] ip_rcv_finish+0x371/0x3b5 [804032bd] ip_rcv+0x292/0x2c6 [803e3dcc] netif_receive_skb+0x267/0x340 [8806eff4] :tg3:tg3_poll+0x5d2/0x89e [803e639d] net_rx_action+0xd5/0x1ad [8023b605] __do_softirq+0x5f/0xe3 [8020c86c] call_softirq+0x1c/0x28 [8020e739] do_softirq+0x39/0x9f [8023b5a4] irq_exit+0x4e/0x50 [8020e880] do_IRQ+0xb7/0xd7 [8020a803] mwait_idle+0x0/0x55 [8020bb66] ret_from_intr+0x0/0xf EOI [8024d623] __atomic_notifier_call_chain+0x20/0x83 [8020a84b] mwait_idle+0x48/0x55 [80209e79] enter_idle+0x22/0x24 [8020a793] cpu_idle+0xa1/0xc5 [8021dfd5] start_secondary+0x3b9/0x3c5 WARNING: at /home/legoater/linux/2.6.24-rc4-mm1/net/ipv4/tcp_input.c:2518 tcp_fastretrans_alert() Pid: 0, comm: swapper Not tainted 2.6.24-rc4-mm1 #2 Call Trace: IRQ [804115bf] tcp_fastretrans_alert+0xb6/0xbf2 [80413f30] tcp_ack+0xdf3/0xfbe [804153b8] tcp_data_queue+0x5da/0xb0a [80416d1e] tcp_rcv_established+0xf3/0x76d [8041d231] tcp_v4_do_rcv+0x37/0x3aa [8041fb1f] tcp_v4_rcv+0x9a9/0xa76 [80402e4e] ip_local_deliver_finish+0x161/0x23c [80403363] ip_local_deliver+0x72/0x77 [80402ca9] ip_rcv_finish+0x371/0x3b5 [804032bd] ip_rcv+0x292/0x2c6 [803e3dcc] netif_receive_skb+0x267/0x340 [8806eff4] :tg3:tg3_poll+0x5d2/0x89e [803e639d] net_rx_action+0xd5/0x1ad [8023b605] __do_softirq+0x5f/0xe3 [8020c86c]
Re: 2.6.24-rc4-mm1
Ilpo Järvinen wrote: On Wed, 5 Dec 2007, Andrew Morton wrote: On Thu, 06 Dec 2007 17:59:37 +1100 Reuben Farrelly [EMAIL PROTECTED] wrote: This non fatal oops which I have just noticed may be related to this change then - certainly looks networking related. yep, but it isn't e1000. It's core TCP. WARNING: at net/ipv4/tcp_input.c:2518 tcp_fastretrans_alert() Pid: 0, comm: swapper Not tainted 2.6.24-rc4-mm1 #1 Ilpo, Reuben's kernel is talking to you ;) ...Please try the patch below. Andrew, this probably fixes your problem (the packets = tp-packets_out) as well. nah. I got the WARNINGs again with this patch. C. Dave, please include this one to net-2.6.25. -- To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [patch -mm 2/4] mqueue namespace : add unshare support
>> Index: 2.6.24-rc3-mm2/include/linux/sched.h >> === >> --- 2.6.24-rc3-mm2.orig/include/linux/sched.h >> +++ 2.6.24-rc3-mm2/include/linux/sched.h >> @@ -27,6 +27,7 @@ >> #define CLONE_NEWUSER 0x1000 /* New user namespace */ >> #define CLONE_NEWPID0x2000 /* New pid namespace */ >> #define CLONE_NEWNET0x4000 /* New network >> namespace */ >> +#define CLONE_NEWMQ 0x8000 /* New posix mqueue namespace */ > > That's it :) We've run out of clone flags on 32-bit platforms :( yes. I have been giving some thoughts to a clone2() to extend the flags but andrew is preparing to recycle CLONE_DETACHED and CLONE_STOPPED for 2.6.26. Some we might have some more time in front of us. C. - To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [patch -mm 2/4] mqueue namespace : add unshare support
>> Index: 2.6.24-rc3-mm2/kernel/fork.c >> === >> --- 2.6.24-rc3-mm2.orig/kernel/fork.c >> +++ 2.6.24-rc3-mm2/kernel/fork.c >> @@ -1004,6 +1004,13 @@ static struct task_struct *copy_process( >> if ((clone_flags & CLONE_SIGHAND) && !(clone_flags & CLONE_VM)) >> return ERR_PTR(-EINVAL); >> >> +/* >> + * mount namespace cannot be unshared when the mqueue >> + * namespace is not > > vice versa - mqueue namespace cannot be unshared when the mount one is not ;) arg. yes :) Thanks ! C. Signed-off-by: Cedric Le Goater <[EMAIL PROTECTED]> --- kernel/fork.c |2 +- 1 file changed, 1 insertion(+), 1 deletion(-) Index: 2.6.24-rc3-mm2/kernel/fork.c === --- 2.6.24-rc3-mm2.orig/kernel/fork.c +++ 2.6.24-rc3-mm2/kernel/fork.c @@ -1005,7 +1005,7 @@ static struct task_struct *copy_process( return ERR_PTR(-EINVAL); /* -* mount namespace cannot be unshared when the mqueue +* mqueue namespace cannot be unshared when the mount * namespace is not */ if ((clone_flags & CLONE_NEWMQ) && !(clone_flags & CLONE_NEWNS)) - To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [patch -mm 0/4] mqueue namespace
Pavel Emelyanov wrote: > Cedric Le Goater wrote: >> Hello ! >> >> Here's a small patchset introducing a new namespace for POSIX >> message queues. >> >> Nothing really complex a part from the mqueue filesystem which >> needed some special care > > Hm... Why did you decided to make it separately from the > IPC namespace? Mostly because it has its own configuration option and filesystem which requires to clone also the mnt namespace. but yes they could probably be merged. Let's see what the others have to say about it. C. - To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [patch -mm 0/4] mqueue namespace
Pavel Emelyanov wrote: Cedric Le Goater wrote: Hello ! Here's a small patchset introducing a new namespace for POSIX message queues. Nothing really complex a part from the mqueue filesystem which needed some special care Hm... Why did you decided to make it separately from the IPC namespace? Mostly because it has its own configuration option and filesystem which requires to clone also the mnt namespace. but yes they could probably be merged. Let's see what the others have to say about it. C. - To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [patch -mm 2/4] mqueue namespace : add unshare support
Index: 2.6.24-rc3-mm2/kernel/fork.c === --- 2.6.24-rc3-mm2.orig/kernel/fork.c +++ 2.6.24-rc3-mm2/kernel/fork.c @@ -1004,6 +1004,13 @@ static struct task_struct *copy_process( if ((clone_flags CLONE_SIGHAND) !(clone_flags CLONE_VM)) return ERR_PTR(-EINVAL); +/* + * mount namespace cannot be unshared when the mqueue + * namespace is not vice versa - mqueue namespace cannot be unshared when the mount one is not ;) arg. yes :) Thanks ! C. Signed-off-by: Cedric Le Goater [EMAIL PROTECTED] --- kernel/fork.c |2 +- 1 file changed, 1 insertion(+), 1 deletion(-) Index: 2.6.24-rc3-mm2/kernel/fork.c === --- 2.6.24-rc3-mm2.orig/kernel/fork.c +++ 2.6.24-rc3-mm2/kernel/fork.c @@ -1005,7 +1005,7 @@ static struct task_struct *copy_process( return ERR_PTR(-EINVAL); /* -* mount namespace cannot be unshared when the mqueue +* mqueue namespace cannot be unshared when the mount * namespace is not */ if ((clone_flags CLONE_NEWMQ) !(clone_flags CLONE_NEWNS)) - To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [patch -mm 2/4] mqueue namespace : add unshare support
Index: 2.6.24-rc3-mm2/include/linux/sched.h === --- 2.6.24-rc3-mm2.orig/include/linux/sched.h +++ 2.6.24-rc3-mm2/include/linux/sched.h @@ -27,6 +27,7 @@ #define CLONE_NEWUSER 0x1000 /* New user namespace */ #define CLONE_NEWPID0x2000 /* New pid namespace */ #define CLONE_NEWNET0x4000 /* New network namespace */ +#define CLONE_NEWMQ 0x8000 /* New posix mqueue namespace */ That's it :) We've run out of clone flags on 32-bit platforms :( yes. I have been giving some thoughts to a clone2() to extend the flags but andrew is preparing to recycle CLONE_DETACHED and CLONE_STOPPED for 2.6.26. Some we might have some more time in front of us. C. - To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH 2.6.24-rc3-mm1] IPC: make struct ipc_ids static in ipc_namespace
Pierre Peiffer wrote: > > Each ipc_namespace contains a table of 3 pointers to struct ipc_ids (3 for > msg, sem and shm, structure used to store all ipcs) > These 'struct ipc_ids' are dynamically allocated for each icp_namespace as > the ipc_namespace itself (for the init namespace, they are initialized with > pointers to static variables instead) > > It is so for historical reason: in fact, before the use of idr to store the > ipcs, the ipcs were stored in tables of variable length, depending of the > maximum number of ipc allowed. > Now, these 'struct ipc_ids' have a fixed size. As they are allocated in any > cases for each new ipc_namespace, there is no gain of memory in having them > allocated separately of the struct ipc_namespace. > > This patch proposes to make this table static in the struct ipc_namespace. > Thus, we can allocate all in once and get rid of all the code needed to > allocate and free these ipc_ids separately. It looks safe and saves quite a lot of line. Pavel, what do you think of it ? Acked-by: Cedric Le Goater <[EMAIL PROTECTED]> Thanks, C. > Signed-off-by: Pierre Peiffer <[EMAIL PROTECTED]> > --- > include/linux/ipc_namespace.h | 13 +++-- > ipc/msg.c | 26 -- > ipc/namespace.c | 25 - > ipc/sem.c | 26 -- > ipc/shm.c | 26 -- > ipc/util.c|6 +++--- > ipc/util.h| 16 > 7 files changed, 34 insertions(+), 104 deletions(-) > > Index: b/include/linux/ipc_namespace.h > === > --- a/include/linux/ipc_namespace.h > +++ b/include/linux/ipc_namespace.h > @@ -2,11 +2,20 @@ > #define __IPC_NAMESPACE_H__ > > #include > +#include > +#include > + > +struct ipc_ids { > + int in_use; > + unsigned short seq; > + unsigned short seq_max; > + struct rw_semaphore rw_mutex; > + struct idr ipcs_idr; > +}; > > -struct ipc_ids; > struct ipc_namespace { > struct kref kref; > - struct ipc_ids *ids[3]; > + struct ipc_ids ids[3]; > > int sem_ctls[4]; > int used_sems; > Index: b/ipc/msg.c > === > --- a/ipc/msg.c > +++ b/ipc/msg.c > @@ -67,9 +67,7 @@ struct msg_sender { > #define SEARCH_NOTEQUAL 3 > #define SEARCH_LESSEQUAL 4 > > -static struct ipc_ids init_msg_ids; > - > -#define msg_ids(ns) (*((ns)->ids[IPC_MSG_IDS])) > +#define msg_ids(ns) ((ns)->ids[IPC_MSG_IDS]) > > #define msg_unlock(msq) ipc_unlock(&(msq)->q_perm) > #define msg_buildid(id, seq) ipc_buildid(id, seq) > @@ -80,30 +78,17 @@ static int newque(struct ipc_namespace * > static int sysvipc_msg_proc_show(struct seq_file *s, void *it); > #endif > > -static void __msg_init_ns(struct ipc_namespace *ns, struct ipc_ids *ids) > +void msg_init_ns(struct ipc_namespace *ns) > { > - ns->ids[IPC_MSG_IDS] = ids; > ns->msg_ctlmax = MSGMAX; > ns->msg_ctlmnb = MSGMNB; > ns->msg_ctlmni = MSGMNI; > atomic_set(>msg_bytes, 0); > atomic_set(>msg_hdrs, 0); > - ipc_init_ids(ids); > + ipc_init_ids(>ids[IPC_MSG_IDS]); > } > > #ifdef CONFIG_IPC_NS > -int msg_init_ns(struct ipc_namespace *ns) > -{ > - struct ipc_ids *ids; > - > - ids = kmalloc(sizeof(struct ipc_ids), GFP_KERNEL); > - if (ids == NULL) > - return -ENOMEM; > - > - __msg_init_ns(ns, ids); > - return 0; > -} > - > void msg_exit_ns(struct ipc_namespace *ns) > { > struct msg_queue *msq; > @@ -126,15 +111,12 @@ void msg_exit_ns(struct ipc_namespace *n > } > > up_write(_ids(ns).rw_mutex); > - > - kfree(ns->ids[IPC_MSG_IDS]); > - ns->ids[IPC_MSG_IDS] = NULL; > } > #endif > > void __init msg_init(void) > { > - __msg_init_ns(_ipc_ns, _msg_ids); > + msg_init_ns(_ipc_ns); > ipc_init_proc_interface("sysvipc/msg", > " key msqid perms cbytes > qnum lspid lrpid uid gid cuid cgid stime rtime ctime\n", > IPC_MSG_IDS, sysvipc_msg_proc_show); > Index: b/ipc/namespace.c > === > --- a/ipc/namespace.c > +++ b/ipc/namespace.c > @@ -14,35 +14,18 @@ > > static struct ip
Re: [PATCH 2.6.24-rc3-mm1] IPC: make struct ipc_ids static in ipc_namespace
Pierre Peiffer wrote: Each ipc_namespace contains a table of 3 pointers to struct ipc_ids (3 for msg, sem and shm, structure used to store all ipcs) These 'struct ipc_ids' are dynamically allocated for each icp_namespace as the ipc_namespace itself (for the init namespace, they are initialized with pointers to static variables instead) It is so for historical reason: in fact, before the use of idr to store the ipcs, the ipcs were stored in tables of variable length, depending of the maximum number of ipc allowed. Now, these 'struct ipc_ids' have a fixed size. As they are allocated in any cases for each new ipc_namespace, there is no gain of memory in having them allocated separately of the struct ipc_namespace. This patch proposes to make this table static in the struct ipc_namespace. Thus, we can allocate all in once and get rid of all the code needed to allocate and free these ipc_ids separately. It looks safe and saves quite a lot of line. Pavel, what do you think of it ? Acked-by: Cedric Le Goater [EMAIL PROTECTED] Thanks, C. Signed-off-by: Pierre Peiffer [EMAIL PROTECTED] --- include/linux/ipc_namespace.h | 13 +++-- ipc/msg.c | 26 -- ipc/namespace.c | 25 - ipc/sem.c | 26 -- ipc/shm.c | 26 -- ipc/util.c|6 +++--- ipc/util.h| 16 7 files changed, 34 insertions(+), 104 deletions(-) Index: b/include/linux/ipc_namespace.h === --- a/include/linux/ipc_namespace.h +++ b/include/linux/ipc_namespace.h @@ -2,11 +2,20 @@ #define __IPC_NAMESPACE_H__ #include linux/err.h +#include linux/idr.h +#include linux/rwsem.h + +struct ipc_ids { + int in_use; + unsigned short seq; + unsigned short seq_max; + struct rw_semaphore rw_mutex; + struct idr ipcs_idr; +}; -struct ipc_ids; struct ipc_namespace { struct kref kref; - struct ipc_ids *ids[3]; + struct ipc_ids ids[3]; int sem_ctls[4]; int used_sems; Index: b/ipc/msg.c === --- a/ipc/msg.c +++ b/ipc/msg.c @@ -67,9 +67,7 @@ struct msg_sender { #define SEARCH_NOTEQUAL 3 #define SEARCH_LESSEQUAL 4 -static struct ipc_ids init_msg_ids; - -#define msg_ids(ns) (*((ns)-ids[IPC_MSG_IDS])) +#define msg_ids(ns) ((ns)-ids[IPC_MSG_IDS]) #define msg_unlock(msq) ipc_unlock((msq)-q_perm) #define msg_buildid(id, seq) ipc_buildid(id, seq) @@ -80,30 +78,17 @@ static int newque(struct ipc_namespace * static int sysvipc_msg_proc_show(struct seq_file *s, void *it); #endif -static void __msg_init_ns(struct ipc_namespace *ns, struct ipc_ids *ids) +void msg_init_ns(struct ipc_namespace *ns) { - ns-ids[IPC_MSG_IDS] = ids; ns-msg_ctlmax = MSGMAX; ns-msg_ctlmnb = MSGMNB; ns-msg_ctlmni = MSGMNI; atomic_set(ns-msg_bytes, 0); atomic_set(ns-msg_hdrs, 0); - ipc_init_ids(ids); + ipc_init_ids(ns-ids[IPC_MSG_IDS]); } #ifdef CONFIG_IPC_NS -int msg_init_ns(struct ipc_namespace *ns) -{ - struct ipc_ids *ids; - - ids = kmalloc(sizeof(struct ipc_ids), GFP_KERNEL); - if (ids == NULL) - return -ENOMEM; - - __msg_init_ns(ns, ids); - return 0; -} - void msg_exit_ns(struct ipc_namespace *ns) { struct msg_queue *msq; @@ -126,15 +111,12 @@ void msg_exit_ns(struct ipc_namespace *n } up_write(msg_ids(ns).rw_mutex); - - kfree(ns-ids[IPC_MSG_IDS]); - ns-ids[IPC_MSG_IDS] = NULL; } #endif void __init msg_init(void) { - __msg_init_ns(init_ipc_ns, init_msg_ids); + msg_init_ns(init_ipc_ns); ipc_init_proc_interface(sysvipc/msg, key msqid perms cbytes qnum lspid lrpid uid gid cuid cgid stime rtime ctime\n, IPC_MSG_IDS, sysvipc_msg_proc_show); Index: b/ipc/namespace.c === --- a/ipc/namespace.c +++ b/ipc/namespace.c @@ -14,35 +14,18 @@ static struct ipc_namespace *clone_ipc_ns(struct ipc_namespace *old_ns) { - int err; struct ipc_namespace *ns; - err = -ENOMEM; ns = kmalloc(sizeof(struct ipc_namespace), GFP_KERNEL); if (ns == NULL) - goto err_mem; + return ERR_PTR(-ENOMEM); - err = sem_init_ns(ns); - if (err) - goto err_sem; - err = msg_init_ns(ns); - if (err) - goto err_msg; - err = shm_init_ns(ns); - if (err) - goto err_shm; + sem_init_ns(ns
Re: [PATCH] Isolate the UTS namespace's domainname and hostname back
Pavel Emelyanov wrote: > The commit > > commit 7d69a1f4a72b18876c99c697692b78339d491568 > Author: Cedric Le Goater <[EMAIL PROTECTED]> > Date: Sun Jul 15 23:40:58 2007 -0700 > > remove CONFIG_UTS_NS and CONFIG_IPC_NS > > accidentally removed the code, that prevented the uts->hostname > and uts->domainname values from being overwritten from another > namespace. In other words, setting hostname/domainname via sysfs > (echo xxx > /proc/sys/kernel/(host|domain)name) cased the new > value to be set in init UTS namespace only. > > Return the isolation back. yep. sorry about that. I was going to send the same patch in a minute. > Signed-off-by: Pavel Emelyanov <[EMAIL PROTECTED]> Acked-by: Cedric Le Goater <[EMAIL PROTECTED]> Thanks, C. - To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH] Isolate the UTS namespace's domainname and hostname back
Pavel Emelyanov wrote: The commit commit 7d69a1f4a72b18876c99c697692b78339d491568 Author: Cedric Le Goater [EMAIL PROTECTED] Date: Sun Jul 15 23:40:58 2007 -0700 remove CONFIG_UTS_NS and CONFIG_IPC_NS accidentally removed the code, that prevented the uts-hostname and uts-domainname values from being overwritten from another namespace. In other words, setting hostname/domainname via sysfs (echo xxx /proc/sys/kernel/(host|domain)name) cased the new value to be set in init UTS namespace only. Return the isolation back. yep. sorry about that. I was going to send the same patch in a minute. Signed-off-by: Pavel Emelyanov [EMAIL PROTECTED] Acked-by: Cedric Le Goater [EMAIL PROTECTED] Thanks, C. - To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH 2/5] Move the UTS namespace under the option
> diff --git a/init/Kconfig b/init/Kconfig > index fc76773..d592aa2 100644 > --- a/init/Kconfig > +++ b/init/Kconfig > @@ -409,6 +409,13 @@ config NAMESPACES > or same user id or pid may refer to different tasks when used in > different namespaces. > > +config UTS_NS > + bool "UTS namespace" > + depends on NAMESPACES should we add a 'default y' like in 2.6.23 ? C. - To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH 0/5] A config option to compile out some namespaces code (v3)
Pavel Emelyanov wrote: > There were some questions like "do I need this on my cellphone" > in reply to different namespaces patches. Indeed, the namespaces > are not useful for most of the embedded systems, but the code > creating and releasing them weights a lot. > > So I propose to add a config option which will help embedded > people to reduce the vmlinux size. This option simply compiles > out the namespaces cloning and releasing code *only*, but keeps > all the other logic untouched (e.g. the notion of init_ns). > > When someone tries to clone some namespace with their support > turned off, he will receive an EINVAL error. > > This patchset can save more than 2KB from the vmlinux when > turning the config option "NAMESPACES" to "n". > > I do not introduce the NAMESPACES_EXPERIMENTAL config option, that > switches all the namespaces we consider experimental, but each > namespace has its own config that can be mrked with "depends on > EXPERIMENTAL" on demand. > > This is mainly done because some people consider pid namespaces broken > ant will probably want to make them depend on BROKEN. In this case > we'll have to introduce the NAMESPACES_BROKEN option which is not that > good. I think the discussion finished with an 'immature' status :) > Signed-off-by: Pavel Emelyanov <[EMAIL PROTECTED]> I'm fine with all these patches and I have a bunch of patches that depend on them already. The sooner they get in the better. Thanks Pavel ! C. - To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH 0/5] A config option to compile out some namespaces code (v3)
Pavel Emelyanov wrote: There were some questions like do I need this on my cellphone in reply to different namespaces patches. Indeed, the namespaces are not useful for most of the embedded systems, but the code creating and releasing them weights a lot. So I propose to add a config option which will help embedded people to reduce the vmlinux size. This option simply compiles out the namespaces cloning and releasing code *only*, but keeps all the other logic untouched (e.g. the notion of init_ns). When someone tries to clone some namespace with their support turned off, he will receive an EINVAL error. This patchset can save more than 2KB from the vmlinux when turning the config option NAMESPACES to n. I do not introduce the NAMESPACES_EXPERIMENTAL config option, that switches all the namespaces we consider experimental, but each namespace has its own config that can be mrked with depends on EXPERIMENTAL on demand. This is mainly done because some people consider pid namespaces broken ant will probably want to make them depend on BROKEN. In this case we'll have to introduce the NAMESPACES_BROKEN option which is not that good. I think the discussion finished with an 'immature' status :) Signed-off-by: Pavel Emelyanov [EMAIL PROTECTED] I'm fine with all these patches and I have a bunch of patches that depend on them already. The sooner they get in the better. Thanks Pavel ! C. - To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH 2/5] Move the UTS namespace under the option
diff --git a/init/Kconfig b/init/Kconfig index fc76773..d592aa2 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -409,6 +409,13 @@ config NAMESPACES or same user id or pid may refer to different tasks when used in different namespaces. +config UTS_NS + bool UTS namespace + depends on NAMESPACES should we add a 'default y' like in 2.6.23 ? C. - To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [Devel] [PATCH] pidns: Place under CONFIG_EXPERIMENTAL (take 2)
Eric W. Biederman wrote: > Andrew Morton <[EMAIL PROTECTED]> writes: > >>> On Sat, 27 Oct 2007 04:04:08 +0200 Adrian Bunk <[EMAIL PROTECTED]> wrote: be happy to hear if someone has a better idea. >>> There is a difference between "complete the feature" and "early adopters >>> to start playing with the feature" on the one side, and making something >>> available in a released kernel on the other side. >>> >>> For development and playing with it it can depend on BROKEN (perhaps >>> with the dependency removed through the first -rc kernels), but as soon >>> as it's available in a -final kernel the ABI is fixed. >>> >> Yes, if we're not 100% certain that the interfaces are correnct and >> unchanging >> and that the implementation is solid, we should disable the feature at >> Kconfig >> time. > > Reasonable. So far things look good for a single pid namespace. Multiple > pid namespaces look iffy. > >> The best option would be to fix things asap. But assuming that option isn't >> reasonable and/or safe, we can slip a `depends on BROKEN' into -rc6 then >> resume development for 2.6.25. > > I think we can make a lot of progress but there is enough development > yet to do to reach the target of correct and unchanging interfaces, > with a solid interface. That unless we achieve a breakthrough I > don't see us achieving that target for 2.6.24. > > The outstanding issues I can think of off the top of my head: > - signal handling for init on secondary pid namespaces. > - Properly setting si_pid on signals that cross namespaces. these are being addressed by suka patches, and also you with the latest patch you sent. right ? > - The kthread API conversion so we don't get kernel threads > trapped in pid namespaces and make them unfreeable. a lot of work has been done on that part. take a look at it. the clean up is really impressive ! NFS still uses the kernel_thread() API. the first thing to do on the kthread topic is to improve the kthread API. I think we can discard the remaining drivers for the moment. > - At fork time I think we are doing a little bit too much work > in setting the session and the pgrp, and removing the controlling > tty. yes probably. this needs to be sorted out. it makes a container init process different from the system init process. > - AF_unix domain credential passing. see commit b488893a390edfe027bae7a46e9af8083e740668 which is covering UNIX socket credentials and more. Are you thinking we should do more for credentials and use a struct pid* ? This looks scary. > - misc pid vs vpid sorting out (autofs autofs4, coda, arch specific > syscalls, others?) autofs* is fixed. netlink ? > - Removal of task->pid, task->tgid, task->signal->__pgrp, > tsk->signal->__session or some other way to ensure that we have > touched and converted all of the kernel pid handling. well, __pgrp and __session are pretty well covered with the __deprecated attribute. I don't see what else we could to do on these. we can't remove the task_{session,pgrp}_* routines. we could apply the same __deprecated technique to task->pid, task->tgid. This is going to be a challenge :) > - flock pid handling. Pavel again. > It hurts me to even ponder what thinking makes it that > CONFIG_EXPERIMENTAL isn't enough to keep a stable distro > from shipping the code in their stable kernel, and locking us into > trouble. > > With that said. I think I should just respin the patchset now and add > the "depends on BROKEN". > > The user namespace appears to need that treatment as well. The kernel will be protected by a CONFIG_NAMESPACES option as soon as it gets in. Unfortunately, it didn't make 2.6.24 so this will be 2.6.25 material. Cheers, C. > The network namespace has so little there and it already depends > on !SYSFS so I don't think we are going to run into any trouble > with it. Happily I managed to parse that problem differently, > so I could slice of the parts of the networking stack that > had not been converted. > > Eric > - > To unsubscribe from this list: send the line "unsubscribe linux-kernel" in > the body of a message to [EMAIL PROTECTED] > More majordomo info at http://vger.kernel.org/majordomo-info.html > Please read the FAQ at http://www.tux.org/lkml/ > - To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [Devel] [PATCH] pidns: Place under CONFIG_EXPERIMENTAL (take 2)
Eric W. Biederman wrote: > Adrian Bunk <[EMAIL PROTECTED]> writes: > >> On Sun, Oct 28, 2007 at 09:12:34AM -0700, Jeremy Fitzhardinge wrote: >>> Eric W. Biederman wrote: Roughly that sounds like CONFIG_EXPERIMENTAL to me. But I would be happy to hear if someone has a better idea. >>> Rather than overload an existing config option, why not add one with the >>> specific semantics you want: CONFIG_UNSTABLE_UABI. The problem seems >>> like one which which may occur again, though one hopes not too often. I >>> guess the risk is that people will leave their subsystems depending on >>> it permanently (sysfs?), so it ends up being set all the time and >>> becomes as useless as EXPERIMENTAL... >> Then let SYSFS depend on UNSTABLE_UABI for the next 10 years and we have >> an excuse for breaking the ABI with each new kernel... >> >> Either the ABI is stable or it should not be exposed to users at all. > > If we need a new config for it. CONFIG_IMMATURE is the closest I > can think of. Pavel also has a CONFIG_NAMESPACES patch that he should be resending to andrew when 2.6.24-rc1-mm1 is released. pidns will go under this option, like all the other namespaces, and should protect the distros from shipping any immature namespace. C. - To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [Devel] [PATCH] pidns: Place under CONFIG_EXPERIMENTAL (take 2)
Eric W. Biederman wrote: Adrian Bunk [EMAIL PROTECTED] writes: On Sun, Oct 28, 2007 at 09:12:34AM -0700, Jeremy Fitzhardinge wrote: Eric W. Biederman wrote: Roughly that sounds like CONFIG_EXPERIMENTAL to me. But I would be happy to hear if someone has a better idea. Rather than overload an existing config option, why not add one with the specific semantics you want: CONFIG_UNSTABLE_UABI. The problem seems like one which which may occur again, though one hopes not too often. I guess the risk is that people will leave their subsystems depending on it permanently (sysfs?), so it ends up being set all the time and becomes as useless as EXPERIMENTAL... Then let SYSFS depend on UNSTABLE_UABI for the next 10 years and we have an excuse for breaking the ABI with each new kernel... Either the ABI is stable or it should not be exposed to users at all. If we need a new config for it. CONFIG_IMMATURE is the closest I can think of. Pavel also has a CONFIG_NAMESPACES patch that he should be resending to andrew when 2.6.24-rc1-mm1 is released. pidns will go under this option, like all the other namespaces, and should protect the distros from shipping any immature namespace. C. - To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [Devel] [PATCH] pidns: Place under CONFIG_EXPERIMENTAL (take 2)
Eric W. Biederman wrote: Andrew Morton [EMAIL PROTECTED] writes: On Sat, 27 Oct 2007 04:04:08 +0200 Adrian Bunk [EMAIL PROTECTED] wrote: be happy to hear if someone has a better idea. There is a difference between complete the feature and early adopters to start playing with the feature on the one side, and making something available in a released kernel on the other side. For development and playing with it it can depend on BROKEN (perhaps with the dependency removed through the first -rc kernels), but as soon as it's available in a -final kernel the ABI is fixed. Yes, if we're not 100% certain that the interfaces are correnct and unchanging and that the implementation is solid, we should disable the feature at Kconfig time. Reasonable. So far things look good for a single pid namespace. Multiple pid namespaces look iffy. The best option would be to fix things asap. But assuming that option isn't reasonable and/or safe, we can slip a `depends on BROKEN' into -rc6 then resume development for 2.6.25. I think we can make a lot of progress but there is enough development yet to do to reach the target of correct and unchanging interfaces, with a solid interface. That unless we achieve a breakthrough I don't see us achieving that target for 2.6.24. The outstanding issues I can think of off the top of my head: - signal handling for init on secondary pid namespaces. - Properly setting si_pid on signals that cross namespaces. these are being addressed by suka patches, and also you with the latest patch you sent. right ? - The kthread API conversion so we don't get kernel threads trapped in pid namespaces and make them unfreeable. a lot of work has been done on that part. take a look at it. the clean up is really impressive ! NFS still uses the kernel_thread() API. the first thing to do on the kthread topic is to improve the kthread API. I think we can discard the remaining drivers for the moment. - At fork time I think we are doing a little bit too much work in setting the session and the pgrp, and removing the controlling tty. yes probably. this needs to be sorted out. it makes a container init process different from the system init process. - AF_unix domain credential passing. see commit b488893a390edfe027bae7a46e9af8083e740668 which is covering UNIX socket credentials and more. Are you thinking we should do more for credentials and use a struct pid* ? This looks scary. - misc pid vs vpid sorting out (autofs autofs4, coda, arch specific syscalls, others?) autofs* is fixed. netlink ? - Removal of task-pid, task-tgid, task-signal-__pgrp, tsk-signal-__session or some other way to ensure that we have touched and converted all of the kernel pid handling. well, __pgrp and __session are pretty well covered with the __deprecated attribute. I don't see what else we could to do on these. we can't remove the task_{session,pgrp}_* routines. we could apply the same __deprecated technique to task-pid, task-tgid. This is going to be a challenge :) - flock pid handling. Pavel again. It hurts me to even ponder what thinking makes it that CONFIG_EXPERIMENTAL isn't enough to keep a stable distro from shipping the code in their stable kernel, and locking us into trouble. With that said. I think I should just respin the patchset now and add the depends on BROKEN. The user namespace appears to need that treatment as well. The kernel will be protected by a CONFIG_NAMESPACES option as soon as it gets in. Unfortunately, it didn't make 2.6.24 so this will be 2.6.25 material. Cheers, C. The network namespace has so little there and it already depends on !SYSFS so I don't think we are going to run into any trouble with it. Happily I managed to parse that problem differently, so I could slice of the parts of the networking stack that had not been converted. Eric - To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/ - To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: 2.6.23-mm1 s390 driver problem
>> that helped going a little further in the boot process but we then have >> a network issue when bringing the network interface up : > > please cc netdev on network issues. yes. >> Bringing up interface eth0: Ý cut here ¨ >> Kernel BUG at 0002 Ýverbose debug info unavailable¨ >> illegal operation: 0001 Ý#1¨ >> Modules linked in: >> CPU:0Not tainted >> Process ip (pid: 1167, task: 01d46038, ksp: 025efb28) >> Krnl PSW : 070420018000 0002 (0x2) >>R:0 T:1 IO:1 EX:1 Key:0 M:1 W:0 P:0 AS:0 CC:2 PM:0 EA:3 >> Krnl GPRS: 0241f600 01c8d >>86dd 01eb6d70 01c8d >>01eb6d40 003abc28 01eb6d00 025ef >>0241f600 003b6d18 002b33d2 025ef >> Krnl Code:>0002: unknown >>0004: unknown >>0006: unknown >>0008: unknown >>000a: unknown >>000c: unknown >>000e: unknown >>0010: unknown >> Call Trace: >> (Ý<002b3352>¨ neigh_connected_output+0x76/0x138) >> Ý<00325402>¨ ip6_output2+0x2da/0x470 >> Ý<00326ea6>¨ ip6_output+0x816/0x1064 >> Ý<00338e46>¨ __ndisc_send+0x416/0x6a8 >> Ý<00339330>¨ ndisc_send_rs+0x58/0x68 >> Ý<0032cbf4>¨ addrconf_dad_completed+0xbc/0x100 >> Ý<0032d2de>¨ addrconf_dad_start+0xa2/0x14c >> Ý<0032d408>¨ addrconf_add_linklocal+0x80/0xa8 >> Ý<0032fa7e>¨ addrconf_notify+0x2de/0x8d4 >> Ý<00383990>¨ notifier_call_chain+0x5c/0x98 >> Ý<00063bca>¨ __raw_notifier_call_chain+0x26/0x34 >> Ý<00063c06>¨ raw_notifier_call_chain+0x2e/0x3c >> Ý<002aa54c>¨ call_netdevice_notifiers+0x34/0x44 >> Ý<002ad1aa>¨ dev_open+0x9e/0xe0 >> Ý<002ad80a>¨ dev_change_flags+0x9e/0x1cc >> Ý<00302c74>¨ devinet_ioctl+0x650/0x73c >> Ý<003050ba>¨ inet_ioctl+0xde/0xf4 >> Ý<0029a8d0>¨ sock_ioctl+0x1cc/0x2dc >> Ý<000cb844>¨ do_ioctl+0xb8/0xcc >> Ý<000cb8f2>¨ vfs_ioctl+0x9a/0x3ec >> Ý<000cbc96>¨ sys_ioctl+0x52/0x7c >> Ý<00022484>¨ sysc_noemu+0x10/0x16 >> Ý<0210df12>¨ 0x210df12 > > that's a network issue ;) > >> 002b3352 gives : include/linux/netdevice.h:819 >> > > I have a feeling that we fixed this. But there's no BUG at 2.6.23-mm1's > include/linux/netdevice.h:819. but dev->header_ops is bogus. right ? > How about setting CONFIG_DEBUG_BUGVERBOSE=y? it is set :( C. - To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: 2.6.23-mm1 s390 driver problem
Martin Schwidefsky wrote: > On Fri, 2007-10-19 at 11:16 +0200, Cedric Le Goater wrote: >>> This is the vmlinux.lds.S problem. The cleanup patch from Sam Ravnborg >>> moved the __initramfs_start and __initramfs_end symbols into >>> the .init.ramfs section. This is in itself not a problem, but it >>> surfaced a bug: there is no *(.init.initramfs), that needs to be >>> *(init.ramfs). I corrected this in the upstream patch but 2.6.23-mm1 has >>> the older one that still causes the "Cannot open root device". For >>> 2.6.23-mm1 use the patch below. >>> >> thanks martin, >> >> that helped going a little further in the boot process but we then have >> a network issue when bringing the network interface up : > > See http://marc.info/?l=linux-kernel=119270398931208=2 hmm, that doesn't fix the oops. /me looking. Thanks, C. - To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: 2.6.23-mm1 s390 driver problem
Martin Schwidefsky wrote: > On Thu, 2007-10-18 at 15:31 -0500, Serge E. Hallyn wrote: >> Quoting Christian Borntraeger ([EMAIL PROTECTED]): >>> Am Donnerstag, 18. Oktober 2007 schrieb Serge E. Hallyn: Sigh, well this turned out less informative than I'd liked. After bisecting 2.6.23 to 2.6.23-mm1, I found that git-s390.patch is the one breaking my s390 boot :( (Frown bc it's a conglomeration of patches0 Symptom is: "Cannot open root device "dasdd2" or unknown-block(94,14)" even though dasdd2 appeared to be found earlier in the boot. I also get >>> Can you post the full console output from IPL to the unsuccessful end? >> Yeah, sorry, appended below. >> >> I had thought that the line >> sysctl table check failed: /sunrpc/transports .7249.14 Missing strategy >> meant that the fix referenced in http://lkml.org/lkml/2007/10/11/48 >> would fix it, but it appeared to have no effect. > > This is the vmlinux.lds.S problem. The cleanup patch from Sam Ravnborg > moved the __initramfs_start and __initramfs_end symbols into > the .init.ramfs section. This is in itself not a problem, but it > surfaced a bug: there is no *(.init.initramfs), that needs to be > *(init.ramfs). I corrected this in the upstream patch but 2.6.23-mm1 has > the older one that still causes the "Cannot open root device". For > 2.6.23-mm1 use the patch below. > thanks martin, that helped going a little further in the boot process but we then have a network issue when bringing the network interface up : Bringing up interface eth0: Ý cut here ¨ Kernel BUG at 0002 Ýverbose debug info unavailable¨ illegal operation: 0001 Ý#1¨ Modules linked in: CPU:0Not tainted Process ip (pid: 1167, task: 01d46038, ksp: 025efb28) Krnl PSW : 070420018000 0002 (0x2) R:0 T:1 IO:1 EX:1 Key:0 M:1 W:0 P:0 AS:0 CC:2 PM:0 EA:3 Krnl GPRS: 0241f600 01c8d 86dd 01eb6d70 01c8d 01eb6d40 003abc28 01eb6d00 025ef 0241f600 003b6d18 002b33d2 025ef Krnl Code:>0002: unknown 0004: unknown 0006: unknown 0008: unknown 000a: unknown 000c: unknown 000e: unknown 0010: unknown Call Trace: (Ý<002b3352>¨ neigh_connected_output+0x76/0x138) Ý<00325402>¨ ip6_output2+0x2da/0x470 Ý<00326ea6>¨ ip6_output+0x816/0x1064 Ý<00338e46>¨ __ndisc_send+0x416/0x6a8 Ý<00339330>¨ ndisc_send_rs+0x58/0x68 Ý<0032cbf4>¨ addrconf_dad_completed+0xbc/0x100 Ý<0032d2de>¨ addrconf_dad_start+0xa2/0x14c Ý<0032d408>¨ addrconf_add_linklocal+0x80/0xa8 Ý<0032fa7e>¨ addrconf_notify+0x2de/0x8d4 Ý<00383990>¨ notifier_call_chain+0x5c/0x98 Ý<00063bca>¨ __raw_notifier_call_chain+0x26/0x34 Ý<00063c06>¨ raw_notifier_call_chain+0x2e/0x3c Ý<002aa54c>¨ call_netdevice_notifiers+0x34/0x44 Ý<002ad1aa>¨ dev_open+0x9e/0xe0 Ý<002ad80a>¨ dev_change_flags+0x9e/0x1cc Ý<00302c74>¨ devinet_ioctl+0x650/0x73c Ý<003050ba>¨ inet_ioctl+0xde/0xf4 Ý<0029a8d0>¨ sock_ioctl+0x1cc/0x2dc Ý<000cb844>¨ do_ioctl+0xb8/0xcc Ý<000cb8f2>¨ vfs_ioctl+0x9a/0x3ec Ý<000cbc96>¨ sys_ioctl+0x52/0x7c Ý<00022484>¨ sysc_noemu+0x10/0x16 Ý<0210df12>¨ 0x210df12 002b3352 gives : include/linux/netdevice.h:819 C. - To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: 2.6.23-mm1 s390 driver problem
Martin Schwidefsky wrote: On Fri, 2007-10-19 at 11:16 +0200, Cedric Le Goater wrote: This is the vmlinux.lds.S problem. The cleanup patch from Sam Ravnborg moved the __initramfs_start and __initramfs_end symbols into the .init.ramfs section. This is in itself not a problem, but it surfaced a bug: there is no *(.init.initramfs), that needs to be *(init.ramfs). I corrected this in the upstream patch but 2.6.23-mm1 has the older one that still causes the Cannot open root device. For 2.6.23-mm1 use the patch below. thanks martin, that helped going a little further in the boot process but we then have a network issue when bringing the network interface up : See http://marc.info/?l=linux-kernelm=119270398931208w=2 hmm, that doesn't fix the oops. /me looking. Thanks, C. - To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: 2.6.23-mm1 s390 driver problem
Martin Schwidefsky wrote: On Thu, 2007-10-18 at 15:31 -0500, Serge E. Hallyn wrote: Quoting Christian Borntraeger ([EMAIL PROTECTED]): Am Donnerstag, 18. Oktober 2007 schrieb Serge E. Hallyn: Sigh, well this turned out less informative than I'd liked. After bisecting 2.6.23 to 2.6.23-mm1, I found that git-s390.patch is the one breaking my s390 boot :( (Frown bc it's a conglomeration of patches0 Symptom is: Cannot open root device dasdd2 or unknown-block(94,14) even though dasdd2 appeared to be found earlier in the boot. I also get Can you post the full console output from IPL to the unsuccessful end? Yeah, sorry, appended below. I had thought that the line sysctl table check failed: /sunrpc/transports .7249.14 Missing strategy meant that the fix referenced in http://lkml.org/lkml/2007/10/11/48 would fix it, but it appeared to have no effect. This is the vmlinux.lds.S problem. The cleanup patch from Sam Ravnborg moved the __initramfs_start and __initramfs_end symbols into the .init.ramfs section. This is in itself not a problem, but it surfaced a bug: there is no *(.init.initramfs), that needs to be *(init.ramfs). I corrected this in the upstream patch but 2.6.23-mm1 has the older one that still causes the Cannot open root device. For 2.6.23-mm1 use the patch below. thanks martin, that helped going a little further in the boot process but we then have a network issue when bringing the network interface up : Bringing up interface eth0: Ý cut here ¨ Kernel BUG at 0002 Ýverbose debug info unavailable¨ illegal operation: 0001 Ý#1¨ Modules linked in: CPU:0Not tainted Process ip (pid: 1167, task: 01d46038, ksp: 025efb28) Krnl PSW : 070420018000 0002 (0x2) R:0 T:1 IO:1 EX:1 Key:0 M:1 W:0 P:0 AS:0 CC:2 PM:0 EA:3 Krnl GPRS: 0241f600 01c8d 86dd 01eb6d70 01c8d 01eb6d40 003abc28 01eb6d00 025ef 0241f600 003b6d18 002b33d2 025ef Krnl Code:0002: unknown 0004: unknown 0006: unknown 0008: unknown 000a: unknown 000c: unknown 000e: unknown 0010: unknown Call Trace: (Ý002b3352¨ neigh_connected_output+0x76/0x138) Ý00325402¨ ip6_output2+0x2da/0x470 Ý00326ea6¨ ip6_output+0x816/0x1064 Ý00338e46¨ __ndisc_send+0x416/0x6a8 Ý00339330¨ ndisc_send_rs+0x58/0x68 Ý0032cbf4¨ addrconf_dad_completed+0xbc/0x100 Ý0032d2de¨ addrconf_dad_start+0xa2/0x14c Ý0032d408¨ addrconf_add_linklocal+0x80/0xa8 Ý0032fa7e¨ addrconf_notify+0x2de/0x8d4 Ý00383990¨ notifier_call_chain+0x5c/0x98 Ý00063bca¨ __raw_notifier_call_chain+0x26/0x34 Ý00063c06¨ raw_notifier_call_chain+0x2e/0x3c Ý002aa54c¨ call_netdevice_notifiers+0x34/0x44 Ý002ad1aa¨ dev_open+0x9e/0xe0 Ý002ad80a¨ dev_change_flags+0x9e/0x1cc Ý00302c74¨ devinet_ioctl+0x650/0x73c Ý003050ba¨ inet_ioctl+0xde/0xf4 Ý0029a8d0¨ sock_ioctl+0x1cc/0x2dc Ý000cb844¨ do_ioctl+0xb8/0xcc Ý000cb8f2¨ vfs_ioctl+0x9a/0x3ec Ý000cbc96¨ sys_ioctl+0x52/0x7c Ý00022484¨ sysc_noemu+0x10/0x16 Ý0210df12¨ 0x210df12 002b3352 gives : include/linux/netdevice.h:819 C. - To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: 2.6.23-mm1 s390 driver problem
that helped going a little further in the boot process but we then have a network issue when bringing the network interface up : please cc netdev on network issues. yes. Bringing up interface eth0: Ý cut here ¨ Kernel BUG at 0002 Ýverbose debug info unavailable¨ illegal operation: 0001 Ý#1¨ Modules linked in: CPU:0Not tainted Process ip (pid: 1167, task: 01d46038, ksp: 025efb28) Krnl PSW : 070420018000 0002 (0x2) R:0 T:1 IO:1 EX:1 Key:0 M:1 W:0 P:0 AS:0 CC:2 PM:0 EA:3 Krnl GPRS: 0241f600 01c8d 86dd 01eb6d70 01c8d 01eb6d40 003abc28 01eb6d00 025ef 0241f600 003b6d18 002b33d2 025ef Krnl Code:0002: unknown 0004: unknown 0006: unknown 0008: unknown 000a: unknown 000c: unknown 000e: unknown 0010: unknown Call Trace: (Ý002b3352¨ neigh_connected_output+0x76/0x138) Ý00325402¨ ip6_output2+0x2da/0x470 Ý00326ea6¨ ip6_output+0x816/0x1064 Ý00338e46¨ __ndisc_send+0x416/0x6a8 Ý00339330¨ ndisc_send_rs+0x58/0x68 Ý0032cbf4¨ addrconf_dad_completed+0xbc/0x100 Ý0032d2de¨ addrconf_dad_start+0xa2/0x14c Ý0032d408¨ addrconf_add_linklocal+0x80/0xa8 Ý0032fa7e¨ addrconf_notify+0x2de/0x8d4 Ý00383990¨ notifier_call_chain+0x5c/0x98 Ý00063bca¨ __raw_notifier_call_chain+0x26/0x34 Ý00063c06¨ raw_notifier_call_chain+0x2e/0x3c Ý002aa54c¨ call_netdevice_notifiers+0x34/0x44 Ý002ad1aa¨ dev_open+0x9e/0xe0 Ý002ad80a¨ dev_change_flags+0x9e/0x1cc Ý00302c74¨ devinet_ioctl+0x650/0x73c Ý003050ba¨ inet_ioctl+0xde/0xf4 Ý0029a8d0¨ sock_ioctl+0x1cc/0x2dc Ý000cb844¨ do_ioctl+0xb8/0xcc Ý000cb8f2¨ vfs_ioctl+0x9a/0x3ec Ý000cbc96¨ sys_ioctl+0x52/0x7c Ý00022484¨ sysc_noemu+0x10/0x16 Ý0210df12¨ 0x210df12 that's a network issue ;) 002b3352 gives : include/linux/netdevice.h:819 I have a feeling that we fixed this. But there's no BUG at 2.6.23-mm1's include/linux/netdevice.h:819. but dev-header_ops is bogus. right ? How about setting CONFIG_DEBUG_BUGVERBOSE=y? it is set :( C. - To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: 2.6.23-mm1 - list_add corruption in cgroup
Hello ! While polling the contents of a cgroup task file, I caught the following corruption. Is there a known race (and a fix) or should I start digging ? the program running in the cgroup is fork/exec intensive: while (1) { int i, s; for (i = 0; i < count; i++) if (fork() == 0) execlp("/bin/true", "true", 0); for (i = 0; i < count; i++) wait(); } Thanks for any insights, C. list_add corruption. next->prev should be prev (80a3f338), but was 00200200. (next=810103dcbe90). [ cut here ] kernel BUG at /home/legoater/linux/2.6.23-mm1/lib/list_debug.c:27! invalid opcode: [1] SMP last sysfs file: /devices/pci:00/:00:1e.0/:01:01.0/local_cpus CPU 3 Modules linked in: ipt_REJECT iptable_filter autofs4 nfs lockd sunrpc tg3 sg joydev ext3 jbd ehci_hcd ohci_hcd uhci_hcd Pid: 2441, comm: bash Not tainted 2.6.23-mm1 #4 RIP: 0010:[] [] __list_add+0x27/0x5b RSP: 0018:810103d87dd8 EFLAGS: 00010296 RAX: 0079 RBX: 810105033040 RCX: 0079 RDX: 810103d960c0 RSI: 0001 RDI: 0096 RBP: 810103d87dd8 R08: 0002 R09: 810008123780 R10: R11: 810103d87a98 R12: R13: 810105033040 R14: 810104c11ac0 R15: FS: 7f4e273556f0() GS:81010011a840() knlGS: CS: 0010 DS: ES: CR0: 8005003b CR2: 006ca2f8 CR3: 000103d82000 CR4: 06e0 DR0: DR1: DR2: DR3: DR6: 0ff0 DR7: 0400 Process bash (pid: 2441, threadinfo 810103d86000, task 810103d960c0) last branch before last exception/interrupt from [] printk+0x68/0x69 to [] __list_add+0x27/0x5b Stack: 810103d87de8 80308d1a 810103d87e08 802606bf 810103d87e08 810103d87ea8 80233dca 810103ddf340 7f4e27355780 810103d87f58 Call Trace: [] list_add+0xc/0xe [] cgroup_post_fork+0x41/0x52 [] copy_process+0x12d0/0x143a [] tracesys+0xdc/0xe1 [] do_fork+0x76/0x203 [] audit_syscall_entry+0x148/0x17e [] tracesys+0xdc/0xe1 [] sys_clone+0x23/0x25 [] ptregscall_common+0x67/0xb0 INFO: lockdep is turned off. Code: 0f 0b eb fe 4c 8b 00 49 39 f0 74 18 48 89 c1 4c 89 c2 48 c7 RIP [] __list_add+0x27/0x5b RSP BUG: soft lockup - CPU#1 stuck for 11s! [true:2030] CPU 1: Modules linked in: ipt_REJECT iptable_filter autofs4 nfs lockd sunrpc tg3 sg joydev ext3 jbd ehci_hcd ohci_hcd uhci_hcd Pid: 2030, comm: true Tainted: G D 2.6.23-mm1 #4 RIP: 0010:[] [] __write_lock_failed+0xf/0x20 RSP: 0018:81010513be80 EFLAGS: 0287 RAX: 0001 RBX: 81010513be98 RCX: 807d8d60 RDX: 0037 RSI: 0037 RDI: 805beac0 RBP: 81010289e040 R08: R09: R10: 8026072c R11: 81010513be08 R12: 81000812c300 R13: 81010289e040 R14: 81010513a000 R15: 810087acb000 FS: () GS:8101000560c0() knlGS: CS: 0010 DS: ES: CR0: 8005003b CR2: 7f8171b028b0 CR3: 00201000 CR4: 06e0 DR0: DR1: DR2: DR3: DR6: 0ff0 DR7: 0400 Call Trace: [] _raw_write_lock+0x6c/0x8b [] cgroup_exit+0x5c/0xc3 [] _write_lock+0x2d/0x31 [] cgroup_exit+0x5c/0xc3 [] do_exit+0x2a0/0x7a5 [] sys_exit_group+0x0/0x14 [] sys_exit_group+0x12/0x14 [] tracesys+0xdc/0xe1 - To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: 2.6.23-mm1 - list_add corruption in cgroup
Hello ! While polling the contents of a cgroup task file, I caught the following corruption. Is there a known race (and a fix) or should I start digging ? the program running in the cgroup is fork/exec intensive: while (1) { int i, s; for (i = 0; i count; i++) if (fork() == 0) execlp(/bin/true, true, 0); for (i = 0; i count; i++) wait(s); } Thanks for any insights, C. list_add corruption. next-prev should be prev (80a3f338), but was 00200200. (next=810103dcbe90). [ cut here ] kernel BUG at /home/legoater/linux/2.6.23-mm1/lib/list_debug.c:27! invalid opcode: [1] SMP last sysfs file: /devices/pci:00/:00:1e.0/:01:01.0/local_cpus CPU 3 Modules linked in: ipt_REJECT iptable_filter autofs4 nfs lockd sunrpc tg3 sg joydev ext3 jbd ehci_hcd ohci_hcd uhci_hcd Pid: 2441, comm: bash Not tainted 2.6.23-mm1 #4 RIP: 0010:[80308cda] [80308cda] __list_add+0x27/0x5b RSP: 0018:810103d87dd8 EFLAGS: 00010296 RAX: 0079 RBX: 810105033040 RCX: 0079 RDX: 810103d960c0 RSI: 0001 RDI: 0096 RBP: 810103d87dd8 R08: 0002 R09: 810008123780 R10: R11: 810103d87a98 R12: R13: 810105033040 R14: 810104c11ac0 R15: FS: 7f4e273556f0() GS:81010011a840() knlGS: CS: 0010 DS: ES: CR0: 8005003b CR2: 006ca2f8 CR3: 000103d82000 CR4: 06e0 DR0: DR1: DR2: DR3: DR6: 0ff0 DR7: 0400 Process bash (pid: 2441, threadinfo 810103d86000, task 810103d960c0) last branch before last exception/interrupt from [80235885] printk+0x68/0x69 to [80308cda] __list_add+0x27/0x5b Stack: 810103d87de8 80308d1a 810103d87e08 802606bf 810103d87e08 810103d87ea8 80233dca 810103ddf340 7f4e27355780 810103d87f58 Call Trace: [80308d1a] list_add+0xc/0xe [802606bf] cgroup_post_fork+0x41/0x52 [80233dca] copy_process+0x12d0/0x143a [8020b9b5] tracesys+0xdc/0xe1 [80234095] do_fork+0x76/0x203 [802679cc] audit_syscall_entry+0x148/0x17e [8020b9b5] tracesys+0xdc/0xe1 [80209dd5] sys_clone+0x23/0x25 [8020bb67] ptregscall_common+0x67/0xb0 INFO: lockdep is turned off. Code: 0f 0b eb fe 4c 8b 00 49 39 f0 74 18 48 89 c1 4c 89 c2 48 c7 RIP [80308cda] __list_add+0x27/0x5b RSP 810103d87dd8 BUG: soft lockup - CPU#1 stuck for 11s! [true:2030] CPU 1: Modules linked in: ipt_REJECT iptable_filter autofs4 nfs lockd sunrpc tg3 sg joydev ext3 jbd ehci_hcd ohci_hcd uhci_hcd Pid: 2030, comm: true Tainted: G D 2.6.23-mm1 #4 RIP: 0010:[80306baf] [80306baf] __write_lock_failed+0xf/0x20 RSP: 0018:81010513be80 EFLAGS: 0287 RAX: 0001 RBX: 81010513be98 RCX: 807d8d60 RDX: 0037 RSI: 0037 RDI: 805beac0 RBP: 81010289e040 R08: R09: R10: 8026072c R11: 81010513be08 R12: 81000812c300 R13: 81010289e040 R14: 81010513a000 R15: 810087acb000 FS: () GS:8101000560c0() knlGS: CS: 0010 DS: ES: CR0: 8005003b CR2: 7f8171b028b0 CR3: 00201000 CR4: 06e0 DR0: DR1: DR2: DR3: DR6: 0ff0 DR7: 0400 Call Trace: [80308a1d] _raw_write_lock+0x6c/0x8b [8026072c] cgroup_exit+0x5c/0xc3 [80474803] _write_lock+0x2d/0x31 [8026072c] cgroup_exit+0x5c/0xc3 [802383c1] do_exit+0x2a0/0x7a5 [80238955] sys_exit_group+0x0/0x14 [80238967] sys_exit_group+0x12/0x14 [8020b9b5] tracesys+0xdc/0xe1 - To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: 2.6.23-mm1
KAMEZAWA Hiroyuki wrote: > On Thu, 11 Oct 2007 21:31:26 -0700 > Andrew Morton <[EMAIL PROTECTED]> wrote: > >> ftp://ftp.kernel.org/pub/linux/kernel/people/akpm/patches/2.6/2.6.23/2.6.23-mm1/ >> >> - I've been largely avoiding applying anything since rc8-mm2 in an attempt >> to stabilise things for the 2.6.23 merge. >> > On RHEL5/x86_64 environment, > > == > [EMAIL PROTECTED] ref-2.6.23-mm1]$ make menuconfig > Makefile:456: /home/kamezawa/ref-2.6.23-mm1/arch//Makefile: No such file or > directory > make: *** No rule to make target > `/home/kamezawa/ref-2.6.23-mm1/arch//Makefile'. Stop. > == > > $(ARCH) cannot be detected automatically... > > What information is useful for fixing this ? cross compile work but native compile doesn't anymore :( Here's a tmp fix. Thanks, C. Signed-off-by: Cedric Le Goater <[EMAIL PROTECTED]> --- Makefile |2 +- 1 file changed, 1 insertion(+), 1 deletion(-) Index: 2.6.23-mm1/Makefile === --- 2.6.23-mm1.orig/Makefile +++ 2.6.23-mm1/Makefile @@ -191,7 +191,7 @@ SUBARCH := $(shell uname -m | sed -e s/i # The empty ARCH and CROSS_COMPILE statements exist so it is easy to # patch in hardcoded values for ARCH and CROSS_COMPILE -ARCH ?= +ARCH ?= $(SUBARCH) CROSS_COMPILE ?= # Kbuild save the ARCH and CROSS_COMPILE setting in .kbuild > > Thanks, > -Kame > > - > To unsubscribe from this list: send the line "unsubscribe linux-kernel" in > the body of a message to [EMAIL PROTECTED] > More majordomo info at http://vger.kernel.org/majordomo-info.html > Please read the FAQ at http://www.tux.org/lkml/ > - To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: 2.6.23-mm1
KAMEZAWA Hiroyuki wrote: On Thu, 11 Oct 2007 21:31:26 -0700 Andrew Morton [EMAIL PROTECTED] wrote: ftp://ftp.kernel.org/pub/linux/kernel/people/akpm/patches/2.6/2.6.23/2.6.23-mm1/ - I've been largely avoiding applying anything since rc8-mm2 in an attempt to stabilise things for the 2.6.23 merge. On RHEL5/x86_64 environment, == [EMAIL PROTECTED] ref-2.6.23-mm1]$ make menuconfig Makefile:456: /home/kamezawa/ref-2.6.23-mm1/arch//Makefile: No such file or directory make: *** No rule to make target `/home/kamezawa/ref-2.6.23-mm1/arch//Makefile'. Stop. == $(ARCH) cannot be detected automatically... What information is useful for fixing this ? cross compile work but native compile doesn't anymore :( Here's a tmp fix. Thanks, C. Signed-off-by: Cedric Le Goater [EMAIL PROTECTED] --- Makefile |2 +- 1 file changed, 1 insertion(+), 1 deletion(-) Index: 2.6.23-mm1/Makefile === --- 2.6.23-mm1.orig/Makefile +++ 2.6.23-mm1/Makefile @@ -191,7 +191,7 @@ SUBARCH := $(shell uname -m | sed -e s/i # The empty ARCH and CROSS_COMPILE statements exist so it is easy to # patch in hardcoded values for ARCH and CROSS_COMPILE -ARCH ?= +ARCH ?= $(SUBARCH) CROSS_COMPILE ?= # Kbuild save the ARCH and CROSS_COMPILE setting in .kbuild Thanks, -Kame - To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/ - To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH] Consolidate IPC namespace cleanup functions
Pavel Emelyanov wrote: > When the IPC namespace is terminated all the IPC objects (i.e. ids) > living in it are freed. This is done in a similar way in X_exit_ns() > functions. All the code can be consolidated, saving 122 bytes when > the NAMESPACES are on. > > This patch must be applied after the ones with the NAMESPACES config > option introduced. > > Signed-off-by: Pavel Emelyanov <[EMAIL PROTECTED]> Acked-by: Cedric Le Goater <[EMAIL PROTECTED]> Thanks Pavel ! C. > > --- > > ipc/msg.c | 23 --- > ipc/namespace.c | 39 +++ > ipc/sem.c | 23 --- > ipc/shm.c | 23 --- > ipc/util.h |6 +++--- > 5 files changed, 54 insertions(+), 60 deletions(-) > > diff --git a/ipc/namespace.c b/ipc/namespace.c > index cef1139..98de4e5 100644 > --- a/ipc/namespace.c > +++ b/ipc/namespace.c > @@ -12,6 +12,45 @@ > > #include "util.h" > > +static void ipc_exit_ns(struct ipc_namespace *ns, struct ipc_ids *ids, > + void (*free_fn)(struct ipc_namespace *ns, void *id)) > +{ > + void *id; > + int next_id; > + int total, in_use; > + > + mutex_lock(>mutex); > + > + in_use = ids->in_use; > + > + for (total = 0, next_id = 0; total < in_use; next_id++) { > + id = idr_find(>ipcs_idr, next_id); > + if (id == NULL) > + continue; > + > + free_fn(ns, id); > + total++; > + } > + mutex_unlock(>mutex); > + > + kfree(ids); > +} > + > +static inline void sem_exit_ns(struct ipc_namespace *ns) > +{ > + ipc_exit_ns(ns, ns->ids[IPC_SEM_IDS], sem_free); > +} > + > +static inline void msg_exit_ns(struct ipc_namespace *ns) > +{ > + ipc_exit_ns(ns, ns->ids[IPC_MSG_IDS], msg_free); > +} > + > +static inline void shm_exit_ns(struct ipc_namespace *ns) > +{ > + ipc_exit_ns(ns, ns->ids[IPC_SHM_IDS], shm_free); > +} > + > static struct ipc_namespace *clone_ipc_ns(struct ipc_namespace *old_ns) > { > int err; > diff --git a/ipc/sem.c b/ipc/sem.c > index 2e9f449..8027a30 100644 > --- a/ipc/sem.c > +++ b/ipc/sem.c > @@ -144,28 +144,13 @@ int sem_init_ns(struct ipc_namespace *ns > return 0; > } > > -void sem_exit_ns(struct ipc_namespace *ns) > +void sem_free(struct ipc_namespace *ns, void *id) > { > struct sem_array *sma; > - int next_id; > - int total, in_use; > > - mutex_lock(_ids(ns).mutex); > - > - in_use = sem_ids(ns).in_use; > - > - for (total = 0, next_id = 0; total < in_use; next_id++) { > - sma = idr_find(_ids(ns).ipcs_idr, next_id); > - if (sma == NULL) > - continue; > - ipc_lock_by_ptr(>sem_perm); > - freeary(ns, sma); > - total++; > - } > - mutex_unlock(_ids(ns).mutex); > - > - kfree(ns->ids[IPC_SEM_IDS]); > - ns->ids[IPC_SEM_IDS] = NULL; > + sma = (struct sem_array *)id; > + ipc_lock_by_ptr(>sem_perm); > + freeary(ns, sma); > } > #endif > > diff --git a/ipc/msg.c b/ipc/msg.c > index eb74965..9b8a155 100644 > --- a/ipc/msg.c > +++ b/ipc/msg.c > @@ -106,28 +106,13 @@ int msg_init_ns(struct ipc_namespace *ns > return 0; > } > > -void msg_exit_ns(struct ipc_namespace *ns) > +void msg_free(struct ipc_namespace *ns, void *id) > { > struct msg_queue *msq; > - int next_id; > - int total, in_use; > > - mutex_lock(_ids(ns).mutex); > - > - in_use = msg_ids(ns).in_use; > - > - for (total = 0, next_id = 0; total < in_use; next_id++) { > - msq = idr_find(_ids(ns).ipcs_idr, next_id); > - if (msq == NULL) > - continue; > - ipc_lock_by_ptr(>q_perm); > - freeque(ns, msq); > - total++; > - } > - mutex_unlock(_ids(ns).mutex); > - > - kfree(ns->ids[IPC_MSG_IDS]); > - ns->ids[IPC_MSG_IDS] = NULL; > + msq = (struct msg_queue *)id; > + ipc_lock_by_ptr(>q_perm); > + freeque(ns, msq); > } > #endif > > diff --git a/ipc/shm.c b/ipc/shm.c > index 2717cbc..8f50166 100644 > --- a/ipc/shm.c > +++ b/ipc/shm.c > @@ -111,28 +111,13 @@ int shm_init_ns(struct ipc_namespace *ns > return 0; > } > > -void shm_exit_ns(struct ipc_namespace *ns) > +void shm_free(struct ipc_namespace *ns, void *id) > { > struct shmid_kernel *shp; > - int next_id; > - int total, in_us
Re: [PATCH] Consolidate IPC namespace cleanup functions
Pavel Emelyanov wrote: When the IPC namespace is terminated all the IPC objects (i.e. ids) living in it are freed. This is done in a similar way in X_exit_ns() functions. All the code can be consolidated, saving 122 bytes when the NAMESPACES are on. This patch must be applied after the ones with the NAMESPACES config option introduced. Signed-off-by: Pavel Emelyanov [EMAIL PROTECTED] Acked-by: Cedric Le Goater [EMAIL PROTECTED] Thanks Pavel ! C. --- ipc/msg.c | 23 --- ipc/namespace.c | 39 +++ ipc/sem.c | 23 --- ipc/shm.c | 23 --- ipc/util.h |6 +++--- 5 files changed, 54 insertions(+), 60 deletions(-) diff --git a/ipc/namespace.c b/ipc/namespace.c index cef1139..98de4e5 100644 --- a/ipc/namespace.c +++ b/ipc/namespace.c @@ -12,6 +12,45 @@ #include util.h +static void ipc_exit_ns(struct ipc_namespace *ns, struct ipc_ids *ids, + void (*free_fn)(struct ipc_namespace *ns, void *id)) +{ + void *id; + int next_id; + int total, in_use; + + mutex_lock(ids-mutex); + + in_use = ids-in_use; + + for (total = 0, next_id = 0; total in_use; next_id++) { + id = idr_find(ids-ipcs_idr, next_id); + if (id == NULL) + continue; + + free_fn(ns, id); + total++; + } + mutex_unlock(ids-mutex); + + kfree(ids); +} + +static inline void sem_exit_ns(struct ipc_namespace *ns) +{ + ipc_exit_ns(ns, ns-ids[IPC_SEM_IDS], sem_free); +} + +static inline void msg_exit_ns(struct ipc_namespace *ns) +{ + ipc_exit_ns(ns, ns-ids[IPC_MSG_IDS], msg_free); +} + +static inline void shm_exit_ns(struct ipc_namespace *ns) +{ + ipc_exit_ns(ns, ns-ids[IPC_SHM_IDS], shm_free); +} + static struct ipc_namespace *clone_ipc_ns(struct ipc_namespace *old_ns) { int err; diff --git a/ipc/sem.c b/ipc/sem.c index 2e9f449..8027a30 100644 --- a/ipc/sem.c +++ b/ipc/sem.c @@ -144,28 +144,13 @@ int sem_init_ns(struct ipc_namespace *ns return 0; } -void sem_exit_ns(struct ipc_namespace *ns) +void sem_free(struct ipc_namespace *ns, void *id) { struct sem_array *sma; - int next_id; - int total, in_use; - mutex_lock(sem_ids(ns).mutex); - - in_use = sem_ids(ns).in_use; - - for (total = 0, next_id = 0; total in_use; next_id++) { - sma = idr_find(sem_ids(ns).ipcs_idr, next_id); - if (sma == NULL) - continue; - ipc_lock_by_ptr(sma-sem_perm); - freeary(ns, sma); - total++; - } - mutex_unlock(sem_ids(ns).mutex); - - kfree(ns-ids[IPC_SEM_IDS]); - ns-ids[IPC_SEM_IDS] = NULL; + sma = (struct sem_array *)id; + ipc_lock_by_ptr(sma-sem_perm); + freeary(ns, sma); } #endif diff --git a/ipc/msg.c b/ipc/msg.c index eb74965..9b8a155 100644 --- a/ipc/msg.c +++ b/ipc/msg.c @@ -106,28 +106,13 @@ int msg_init_ns(struct ipc_namespace *ns return 0; } -void msg_exit_ns(struct ipc_namespace *ns) +void msg_free(struct ipc_namespace *ns, void *id) { struct msg_queue *msq; - int next_id; - int total, in_use; - mutex_lock(msg_ids(ns).mutex); - - in_use = msg_ids(ns).in_use; - - for (total = 0, next_id = 0; total in_use; next_id++) { - msq = idr_find(msg_ids(ns).ipcs_idr, next_id); - if (msq == NULL) - continue; - ipc_lock_by_ptr(msq-q_perm); - freeque(ns, msq); - total++; - } - mutex_unlock(msg_ids(ns).mutex); - - kfree(ns-ids[IPC_MSG_IDS]); - ns-ids[IPC_MSG_IDS] = NULL; + msq = (struct msg_queue *)id; + ipc_lock_by_ptr(msq-q_perm); + freeque(ns, msq); } #endif diff --git a/ipc/shm.c b/ipc/shm.c index 2717cbc..8f50166 100644 --- a/ipc/shm.c +++ b/ipc/shm.c @@ -111,28 +111,13 @@ int shm_init_ns(struct ipc_namespace *ns return 0; } -void shm_exit_ns(struct ipc_namespace *ns) +void shm_free(struct ipc_namespace *ns, void *id) { struct shmid_kernel *shp; - int next_id; - int total, in_use; - - mutex_lock(shm_ids(ns).mutex); - - in_use = shm_ids(ns).in_use; - - for (total = 0, next_id = 0; total in_use; next_id++) { - shp = idr_find(shm_ids(ns).ipcs_idr, next_id); - if (shp == NULL) - continue; - ipc_lock_by_ptr(shp-shm_perm); - do_shm_rmid(ns, shp); - total++; - } - mutex_unlock(shm_ids(ns).mutex); - kfree(ns-ids[IPC_SHM_IDS]); - ns-ids[IPC_SHM_IDS] = NULL; + shp = (struct shmid_kernel *)id; + ipc_lock_by_ptr(shp-shm_perm); + do_shm_rmid(ns, shp); } #endif diff --git a/ipc/util.h b/ipc/util.h index 8972402
Re: [PATCH 3/5] Move the IPC namespace under the option
Pavel Emelyanov wrote: > Currently all the IPC namespace management code is in > ipc/util.c. I moved this code into ipc/namespace.c file > which is compiled out when needed. > > The linux/ipc_namespace.h file is used to store the > prototypes of the functions in namespace.c and the stubs > for NAMESPACES=n case. This is done so, because the stub > for copy_ipc_namespace requires the knoweledge of the > CLONE_NEWIPC flag, which is in sched.h. But the linux/ipc.h > file itself in included into many many .c files via the > sys.h->sem.h sequence so adding the sched.h into it will > make all these .c depend on sched.h which is not that good. > On the other hand the knowledge about the namespaces stuff > is required in 4 .c files only. > > Besides, this patch compiles out some auxiliary functions > from ipc/sem.c, msg.c and shm.c files. It turned out that > moving these functions into namespaces.c is not that easy > because they use many other calls and macros from the original > file. Moving them would make this patch complicated. On the > other hand all these functions can be consolidated, so I > will make it separately a bit later. > > Signed-off-by: Pavel Emelyanov <[EMAIL PROTECTED]> Fine with me. Acked-by: Cedric Le Goater <[EMAIL PROTECTED]> Thanks ! C. - To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: 2.6.23-rc8-mm2 - tcp_fastretrans_alert() WARNING
Ilpo Järvinen wrote: > On Sat, 29 Sep 2007, Cedric Le Goater wrote: > >> Ilpo Järvinen wrote: >>> On Fri, 28 Sep 2007, Ilpo Järvinen wrote: >>>> On Fri, 28 Sep 2007, Cedric Le Goater wrote: >>>> >>>>> I just found that warning in my logs. It seems that it's been >>>>> happening since rc7-mm1 at least. >>>>> >>>>> WARNING: at /home/legoater/linux/2.6.23-rc8-mm2/net/ipv4/tcp_input.c:2314 >>>>> tcp_fastretrans_alert() >>>>> >>>>> Call Trace: >>>>>[] tcp_ack+0xcd6/0x1894 >>>>> ...snip... >>>> ...Thanks for the report, I'll have look what could still break >>>> fackets_out... >>> I think this one is now clear to me, tcp_fragment/collapse adjusts >>> fackets_out (incorrectly) also for reno flow when there were some dupACKs >>> that made sacked_out != 0. Could you please try if patch below proves all >>> them to be of non-SACK origin... In case that's true, it's rather >>> harmless, I'll send a fix on Monday or so (this would anyway be needed)... >>> If you find out that them occur with SACK enabled flow, that would be >>> more interesting and requires more digging... >> I'm trying now to reproduce this WARNING. >> >> It seems that the n/w behaves differently during the week ends. Probably >> taking a break. > > Thanks. > > Of course there are other means too to determine if TCP flows do negotiate > SACK enabled or not. Depending on your test case (which is fully unknown > to me) they may or may not be usable... At least the value of tcp_sack > sysctl on both systems or tcpdump catching SYN packets should give that > detail. ...If you know to which hosts TCP could be connected (and active) > to, while the WARNING triggers, it's really easy to test what is being > negotiated as it's unlikely to change at short notice and any TCP flow to > that host will get us the same information though the WARNING would not be > triggered with it at this time. Obviously if at least one of the remotes > is not known or the set ends up being mixture of reno and SACK flows, then > we'll just have to wait and see which fish we get... got it ! r3-06.test.meiosys.com login: WARNING: at /home/legoater/linux/2.6.23-rc8-mm2/net/ipv4/tcp_input.c:2314 tcp_fastretrans_alert() Call Trace: [] tcp_ack+0xcd6/0x18af [] tcp_rcv_established+0x61f/0x6df [] __lock_acquire+0x8a1/0xf1b [] tcp_v4_do_rcv+0x3e/0x394 [] tcp_v4_rcv+0x61c/0x9a9 [] ip_local_deliver+0x1da/0x2a4 [] ip_rcv+0x583/0x5c9 [] packet_rcv_spkt+0x19a/0x1a8 [] netif_receive_skb+0x2cf/0x2f5 [] :tg3:tg3_poll+0x65d/0x8a4 [] net_rx_action+0xb8/0x191 [] __do_softirq+0x5f/0xe0 [] call_softirq+0x1c/0x28 [] do_softirq+0x3b/0xb8 [] irq_exit+0x4e/0x50 [] do_IRQ+0xbd/0xd7 [] mwait_idle+0x0/0x4d [] ret_from_intr+0x0/0xf [] mwait_idle+0x43/0x4d [] enter_idle+0x22/0x24 [] cpu_idle+0x9d/0xc0 [] rest_init+0x55/0x57 [] start_kernel+0x2d6/0x2e2 [] _sinittext+0x134/0x13b TCP 0 I wasn't doing any particular test on n/w so it took me a while to figure out how I was triggering the WARNING. Apparently, this is happening when I run ketchup, but not always. This test machine is behind many firewall & routers so it might be a reason. tcpdump gave me this output for a wget on kernel.org : 10:51:14.835981 IP r3-06.test.meiosys.com.40322 > pub2.kernel.org.http: S 737836267:737836267(0) win 5840 10:51:14.975153 IP pub2.kernel.org.http > r3-06.test.meiosys.com.40321: F 524:524(0) ack 166 win 5840 10:51:14.975177 IP r3-06.test.meiosys.com.40321 > pub2.kernel.org.http: . ack 525 win 7504 I'm trying to get the WARNING and the tcpdump output for it but for the moment, it seems it's beyond my reach :/ Hope it helps ! C. - To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: 2.6.23-rc8-mm2 - tcp_fastretrans_alert() WARNING
Ilpo Järvinen wrote: On Sat, 29 Sep 2007, Cedric Le Goater wrote: Ilpo Järvinen wrote: On Fri, 28 Sep 2007, Ilpo Järvinen wrote: On Fri, 28 Sep 2007, Cedric Le Goater wrote: I just found that warning in my logs. It seems that it's been happening since rc7-mm1 at least. WARNING: at /home/legoater/linux/2.6.23-rc8-mm2/net/ipv4/tcp_input.c:2314 tcp_fastretrans_alert() Call Trace: IRQ [8040fdc3] tcp_ack+0xcd6/0x1894 ...snip... ...Thanks for the report, I'll have look what could still break fackets_out... I think this one is now clear to me, tcp_fragment/collapse adjusts fackets_out (incorrectly) also for reno flow when there were some dupACKs that made sacked_out != 0. Could you please try if patch below proves all them to be of non-SACK origin... In case that's true, it's rather harmless, I'll send a fix on Monday or so (this would anyway be needed)... If you find out that them occur with SACK enabled flow, that would be more interesting and requires more digging... I'm trying now to reproduce this WARNING. It seems that the n/w behaves differently during the week ends. Probably taking a break. Thanks. Of course there are other means too to determine if TCP flows do negotiate SACK enabled or not. Depending on your test case (which is fully unknown to me) they may or may not be usable... At least the value of tcp_sack sysctl on both systems or tcpdump catching SYN packets should give that detail. ...If you know to which hosts TCP could be connected (and active) to, while the WARNING triggers, it's really easy to test what is being negotiated as it's unlikely to change at short notice and any TCP flow to that host will get us the same information though the WARNING would not be triggered with it at this time. Obviously if at least one of the remotes is not known or the set ends up being mixture of reno and SACK flows, then we'll just have to wait and see which fish we get... got it ! r3-06.test.meiosys.com login: WARNING: at /home/legoater/linux/2.6.23-rc8-mm2/net/ipv4/tcp_input.c:2314 tcp_fastretrans_alert() Call Trace: IRQ [8040fdc3] tcp_ack+0xcd6/0x18af [80412b6f] tcp_rcv_established+0x61f/0x6df [80254146] __lock_acquire+0x8a1/0xf1b [80419d19] tcp_v4_do_rcv+0x3e/0x394 [8041a68b] tcp_v4_rcv+0x61c/0x9a9 [803ff1e3] ip_local_deliver+0x1da/0x2a4 [803ffb4e] ip_rcv+0x583/0x5c9 [8046d35b] packet_rcv_spkt+0x19a/0x1a8 [803e081c] netif_receive_skb+0x2cf/0x2f5 [88042505] :tg3:tg3_poll+0x65d/0x8a4 [803e09e8] net_rx_action+0xb8/0x191 [8023a927] __do_softirq+0x5f/0xe0 [8020c98c] call_softirq+0x1c/0x28 [8020e9c3] do_softirq+0x3b/0xb8 [8023aa1e] irq_exit+0x4e/0x50 [8020e7df] do_IRQ+0xbd/0xd7 [80209cb9] mwait_idle+0x0/0x4d [8020bce6] ret_from_intr+0x0/0xf EOI [80209cfc] mwait_idle+0x43/0x4d [802099fb] enter_idle+0x22/0x24 [80209c4f] cpu_idle+0x9d/0xc0 [80476aa1] rest_init+0x55/0x57 [80630815] start_kernel+0x2d6/0x2e2 [80630134] _sinittext+0x134/0x13b TCP 0 I wasn't doing any particular test on n/w so it took me a while to figure out how I was triggering the WARNING. Apparently, this is happening when I run ketchup, but not always. This test machine is behind many firewall routers so it might be a reason. tcpdump gave me this output for a wget on kernel.org : 10:51:14.835981 IP r3-06.test.meiosys.com.40322 pub2.kernel.org.http: S 737836267:737836267(0) win 5840 mss 1460,sackOK,timestamp 1309245 0,nop,wscale 7 10:51:14.975153 IP pub2.kernel.org.http r3-06.test.meiosys.com.40321: F 524:524(0) ack 166 win 5840 10:51:14.975177 IP r3-06.test.meiosys.com.40321 pub2.kernel.org.http: . ack 525 win 7504 I'm trying to get the WARNING and the tcpdump output for it but for the moment, it seems it's beyond my reach :/ Hope it helps ! C. - To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH 3/5] Move the IPC namespace under the option
Pavel Emelyanov wrote: Currently all the IPC namespace management code is in ipc/util.c. I moved this code into ipc/namespace.c file which is compiled out when needed. The linux/ipc_namespace.h file is used to store the prototypes of the functions in namespace.c and the stubs for NAMESPACES=n case. This is done so, because the stub for copy_ipc_namespace requires the knoweledge of the CLONE_NEWIPC flag, which is in sched.h. But the linux/ipc.h file itself in included into many many .c files via the sys.h-sem.h sequence so adding the sched.h into it will make all these .c depend on sched.h which is not that good. On the other hand the knowledge about the namespaces stuff is required in 4 .c files only. Besides, this patch compiles out some auxiliary functions from ipc/sem.c, msg.c and shm.c files. It turned out that moving these functions into namespaces.c is not that easy because they use many other calls and macros from the original file. Moving them would make this patch complicated. On the other hand all these functions can be consolidated, so I will make it separately a bit later. Signed-off-by: Pavel Emelyanov [EMAIL PROTECTED] Fine with me. Acked-by: Cedric Le Goater [EMAIL PROTECTED] Thanks ! C. - To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: 2.6.23-rc8-mm2 - tcp_fastretrans_alert() WARNING
Ilpo Järvinen wrote: > On Fri, 28 Sep 2007, Ilpo Järvinen wrote: >> On Fri, 28 Sep 2007, Cedric Le Goater wrote: >> >>> I just found that warning in my logs. It seems that it's been >>> happening since rc7-mm1 at least. >>> >>> WARNING: at /home/legoater/linux/2.6.23-rc8-mm2/net/ipv4/tcp_input.c:2314 >>> tcp_fastretrans_alert() >>> >>> Call Trace: >>>[] tcp_ack+0xcd6/0x1894 >>> ...snip... >> ...Thanks for the report, I'll have look what could still break >> fackets_out... > > I think this one is now clear to me, tcp_fragment/collapse adjusts > fackets_out (incorrectly) also for reno flow when there were some dupACKs > that made sacked_out != 0. Could you please try if patch below proves all > them to be of non-SACK origin... In case that's true, it's rather > harmless, I'll send a fix on Monday or so (this would anyway be needed)... > If you find out that them occur with SACK enabled flow, that would be > more interesting and requires more digging... I'm trying now to reproduce this WARNING. It seems that the n/w behaves differently during the week ends. Probably taking a break. C. - To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: 2.6.23-rc8-mm2 - tcp_fastretrans_alert() WARNING
Ilpo Järvinen wrote: On Fri, 28 Sep 2007, Ilpo Järvinen wrote: On Fri, 28 Sep 2007, Cedric Le Goater wrote: I just found that warning in my logs. It seems that it's been happening since rc7-mm1 at least. WARNING: at /home/legoater/linux/2.6.23-rc8-mm2/net/ipv4/tcp_input.c:2314 tcp_fastretrans_alert() Call Trace: IRQ [8040fdc3] tcp_ack+0xcd6/0x1894 ...snip... ...Thanks for the report, I'll have look what could still break fackets_out... I think this one is now clear to me, tcp_fragment/collapse adjusts fackets_out (incorrectly) also for reno flow when there were some dupACKs that made sacked_out != 0. Could you please try if patch below proves all them to be of non-SACK origin... In case that's true, it's rather harmless, I'll send a fix on Monday or so (this would anyway be needed)... If you find out that them occur with SACK enabled flow, that would be more interesting and requires more digging... I'm trying now to reproduce this WARNING. It seems that the n/w behaves differently during the week ends. Probably taking a break. C. - To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: 2.6.23-rc8-mm2 - tcp_fastretrans_alert() WARNING
Hello ! Andrew Morton wrote: > ftp://ftp.kernel.org/pub/linux/kernel/people/akpm/patches/2.6/2.6.23-rc8/2.6.23-rc8-mm2/ I just found that warning in my logs. It seems that it's been happening since rc7-mm1 at least. Thanks ! C. WARNING: at /home/legoater/linux/2.6.23-rc8-mm2/net/ipv4/tcp_input.c:2314 tcp_fastretrans_alert() Call Trace: [] tcp_ack+0xcd6/0x1894 [] tcp_data_queue+0x5be/0xae7 [] tcp_rcv_established+0x61f/0x6df [] __lock_acquire+0x8a1/0xf1b [] tcp_v4_do_rcv+0x3e/0x394 [] tcp_v4_rcv+0x61c/0x9a9 [] ip_local_deliver+0x1da/0x2a4 [] ip_rcv+0x583/0x5c9 [] packet_rcv_spkt+0x19a/0x1a8 [] netif_receive_skb+0x2cf/0x2f5 [] :tg3:tg3_poll+0x65d/0x8a4 [] net_rx_action+0xb8/0x191 [] __do_softirq+0x5f/0xe0 [] call_softirq+0x1c/0x28 [] do_softirq+0x3b/0xb8 [] irq_exit+0x4e/0x50 [] do_IRQ+0xbd/0xd7 [] mwait_idle+0x0/0x4d [] ret_from_intr+0x0/0xf [] mwait_idle+0x43/0x4d [] enter_idle+0x22/0x24 [] cpu_idle+0x9d/0xc0 [] rest_init+0x55/0x57 [] start_kernel+0x2d6/0x2e2 [] _sinittext+0x134/0x13b - To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: 2.6.23-rc8-mm2 - tcp_fastretrans_alert() WARNING
Hello ! Andrew Morton wrote: ftp://ftp.kernel.org/pub/linux/kernel/people/akpm/patches/2.6/2.6.23-rc8/2.6.23-rc8-mm2/ I just found that warning in my logs. It seems that it's been happening since rc7-mm1 at least. Thanks ! C. WARNING: at /home/legoater/linux/2.6.23-rc8-mm2/net/ipv4/tcp_input.c:2314 tcp_fastretrans_alert() Call Trace: IRQ [8040fdc3] tcp_ack+0xcd6/0x1894 [80411c79] tcp_data_queue+0x5be/0xae7 [80412b54] tcp_rcv_established+0x61f/0x6df [80254146] __lock_acquire+0x8a1/0xf1b [80419cfd] tcp_v4_do_rcv+0x3e/0x394 [8041a66f] tcp_v4_rcv+0x61c/0x9a9 [803ff1e3] ip_local_deliver+0x1da/0x2a4 [803ffb4e] ip_rcv+0x583/0x5c9 [8046d33f] packet_rcv_spkt+0x19a/0x1a8 [803e081c] netif_receive_skb+0x2cf/0x2f5 [88042505] :tg3:tg3_poll+0x65d/0x8a4 [803e09e8] net_rx_action+0xb8/0x191 [8023a927] __do_softirq+0x5f/0xe0 [8020c98c] call_softirq+0x1c/0x28 [8020e9c3] do_softirq+0x3b/0xb8 [8023aa1e] irq_exit+0x4e/0x50 [8020e7df] do_IRQ+0xbd/0xd7 [80209cb9] mwait_idle+0x0/0x4d [8020bce6] ret_from_intr+0x0/0xf EOI [80209cfc] mwait_idle+0x43/0x4d [802099fb] enter_idle+0x22/0x24 [80209c4f] cpu_idle+0x9d/0xc0 [80476a91] rest_init+0x55/0x57 [80630815] start_kernel+0x2d6/0x2e2 [80630134] _sinittext+0x134/0x13b - To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH 3/5] Move the IPC namespace under the option
Pavel Emelyanov wrote: > Currently all the IPC namespace management code is in > ipc/util.c. I moved this code into ipc/namespace.c file > which is compiled out when needed. > > The linux/ipc_namespace.h file is used to store the > prototypes of the functions in namespace.c and the stubs > for NAMESPACES=n case. This is done so, because the stub > for copy_ipc_namespace requires the knoweledge of the > CLONE_NEWIPC flag, which is in sched.h. But the linux/ipc.h > file itself in included into many many .c files via the > sys.h->sem.h sequence so adding the sched.h into it will > make all these .c depend on sched.h which is not that good. > On the other hand the knowledge about the namespaces stuff > is required in 4 .c files only. > > Besides, this patch compiles out some auxiliary functions > from ipc/sem.c, msg.c and shm.c files. > > Signied-off-by: Pavel Emelyanov <[EMAIL PROTECTED]> ^ that sounds french :) Also, for the code aesthetic, I think that ipc/ipc_namespace.c is a better name which fits the header file ipc/ipc_namespace.h. I'm wondering if it possible to move the init routines : #ifdef CONFIG_NAMESPACES int {msg,sem,shm}_init_ns(struct ipc_namespace *ns) { ... } #endif from the ipc/{msg,sem,shm}.c file to the ipc/ipc_namespace.c file. It would look better in the code than using ugly #ifdef. Thanks ! C. > > --- > > diff --git a/include/linux/ipc.h b/include/linux/ipc.h > index 96988d1..b882610 100644 > --- a/include/linux/ipc.h > +++ b/include/linux/ipc.h > @@ -100,56 +100,6 @@ struct kern_ipc_perm > void*security; > }; > > -struct ipc_ids; > -struct ipc_namespace { > - struct kref kref; > - struct ipc_ids *ids[3]; > - > - int sem_ctls[4]; > - int used_sems; > - > - int msg_ctlmax; > - int msg_ctlmnb; > - int msg_ctlmni; > - > - size_t shm_ctlmax; > - size_t shm_ctlall; > - int shm_ctlmni; > - int shm_tot; > -}; > - > -extern struct ipc_namespace init_ipc_ns; > - > -#ifdef CONFIG_SYSVIPC > -#define INIT_IPC_NS(ns) .ns = _ipc_ns, > -extern void free_ipc_ns(struct kref *kref); > -extern struct ipc_namespace *copy_ipcs(unsigned long flags, > - struct ipc_namespace *ns); > -#else > -#define INIT_IPC_NS(ns) > -static inline struct ipc_namespace *copy_ipcs(unsigned long flags, > - struct ipc_namespace *ns) > -{ > - return ns; > -} > -#endif > - > -static inline struct ipc_namespace *get_ipc_ns(struct ipc_namespace *ns) > -{ > -#ifdef CONFIG_SYSVIPC > - if (ns) > - kref_get(>kref); > -#endif > - return ns; > -} > - > -static inline void put_ipc_ns(struct ipc_namespace *ns) > -{ > -#ifdef CONFIG_SYSVIPC > - kref_put(>kref, free_ipc_ns); > -#endif > -} > - > #endif /* __KERNEL__ */ > > #endif /* _LINUX_IPC_H */ > diff --git a/include/linux/ipc_namespace.h b/include/linux/ipc_namespace.h > new file mode 100644 > index 000..3d8a516 > --- /dev/null > +++ b/include/linux/ipc_namespace.h > @@ -0,0 +1,67 @@ > +#ifndef __IPC_NAMESPACE_H__ > +#define __IPC_NAMESPACE_H__ > + > +#include > + > +struct ipc_ids; > +struct ipc_namespace { > + struct kref kref; > + struct ipc_ids *ids[3]; > + > + int sem_ctls[4]; > + int used_sems; > + > + int msg_ctlmax; > + int msg_ctlmnb; > + int msg_ctlmni; > + > + size_t shm_ctlmax; > + size_t shm_ctlall; > + int shm_ctlmni; > + int shm_tot; > +}; > + > +extern struct ipc_namespace init_ipc_ns; > + > +#ifdef CONFIG_SYSVIPC > +#define INIT_IPC_NS(ns) .ns = _ipc_ns, > +#else > +#define INIT_IPC_NS(ns) > +#endif > + > +#if defined(CONFIG_SYSVIPC) && defined(CONFIG_NAMESPACES) > +extern void free_ipc_ns(struct kref *kref); > +extern struct ipc_namespace *copy_ipcs(unsigned long flags, > + struct ipc_namespace *ns); > + > +static inline struct ipc_namespace *get_ipc_ns(struct ipc_namespace *ns) > +{ > + if (ns) > + kref_get(>kref); > + return ns; > +} > + > +static inline void put_ipc_ns(struct ipc_namespace *ns) > +{ > + kref_put(>kref, free_ipc_ns); > +} > +#else > +static inline struct ipc_namespace *copy_ipcs(unsigned long flags, > + struct ipc_namespace *ns) > +{ > + if (flags & CLONE_NEWIPC) > + return ERR_PTR(-EINVAL); > + > + return ns; > +} > + > +static inline struct ipc_namespace *get_ipc_ns(struct ipc_namespace *ns) > +{ > + return ns; > +} > + > +static inline void put_ipc_ns(struct ipc_namespace *ns) > +{ > +} > +#endif > +#endif > diff --git a/ipc/util.c b/ipc/util.c > index fd29246..44fb843 100644 >
Re: [PATCH 2/5] Move the UST namespace under the option
Pavel Emelyanov wrote: > Currently all the namespace management code is in the > kernel/utsname.c file, so just compile it out and make > stub in .h file. > > The init namespace itself is in init/version.c and is > left in the kernel. > > Signed-off-by: Pavel Emelyanov <[EMAIL PROTECTED]> Acked-by: Cedric Le Goater <[EMAIL PROTECTED]> Thanks ! C. > > --- > > diff --git a/include/linux/utsname.h b/include/linux/utsname.h > index 923db99..52b9116 100644 > --- a/include/linux/utsname.h > +++ b/include/linux/utsname.h > @@ -35,6 +35,7 @@ struct new_utsname { > #include > #include > #include > +#include > #include > > struct uts_namespace { > @@ -43,6 +44,7 @@ struct uts_namespace { > }; > extern struct uts_namespace init_uts_ns; > > +#ifdef CONFIG_NAMESPACES > static inline void get_uts_ns(struct uts_namespace *ns) > { > kref_get(>kref); > @@ -56,6 +58,25 @@ static inline void put_uts_ns(struct uts > { > kref_put(>kref, free_uts_ns); > } > +#else > +static inline void get_uts_ns(struct uts_namespace *ns) > +{ > +} > + > +static inline void put_uts_ns(struct uts_namespace *ns) > +{ > +} > + > +static inline struct uts_namespace *copy_utsname(unsigned long flags, > + struct uts_namespace *ns) > +{ > + if (flags & CLONE_NEWUTS) > + return ERR_PTR(-EINVAL); > + > + return ns; > +} > +#endif > + > static inline struct new_utsname *utsname(void) > { > return >nsproxy->uts_ns->name; > diff --git a/kernel/Makefile b/kernel/Makefile > index 76f782f..5817bfe 100644 > --- a/kernel/Makefile > +++ b/kernel/Makefile > @@ -4,8 +4,7 @@ > signal.o sys.o kmod.o workqueue.o pid.o \ > rcupdate.o extable.o params.o posix-timers.o \ > kthread.o wait.o kfifo.o sys_ni.o posix-cpu-timers.o mutex.o \ > - hrtimer.o rwsem.o latency.o nsproxy.o srcu.o \ > - utsname.o notifier.o sysctl.o > + hrtimer.o rwsem.o latency.o nsproxy.o srcu.o notifier.o sysctl.o > > obj-$(CONFIG_SYSCTL) += sysctl_check.o > obj-$(CONFIG_STACKTRACE) += stacktrace.o > @@ -50,6 +49,7 @@ obj-$(CONFIG_AUDITSYSCALL) += auditsc.o > obj-$(CONFIG_AUDIT_TREE) += audit_tree.o > obj-$(CONFIG_KPROBES) += kprobes.o > obj-$(CONFIG_KGDB) += kgdb.o > +obj-$(CONFIG_NAMESPACES) += utsname.o > obj-$(CONFIG_SYSFS) += ksysfs.o > obj-$(CONFIG_DETECT_SOFTLOCKUP) += softlockup.o > obj-$(CONFIG_GENERIC_HARDIRQS) += irq/ > > - > To unsubscribe from this list: send the line "unsubscribe linux-kernel" in > the body of a message to [EMAIL PROTECTED] > More majordomo info at http://vger.kernel.org/majordomo-info.html > Please read the FAQ at http://www.tux.org/lkml/ > - To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH 4/5] Move the user namespace under the option
Pavel Emelyanov wrote: > We currently have a CONFIG_USER_NS option. Just rename it > into CONFIG_NAMESPACES_EXPERIMANTAL and move the init_user_ns > into user.c file to make the kernel compile and work without > the namespaces support. > > Signed-off-by: Pavel Emelyanov <[EMAIL PROTECTED]> Acked-by: Cedric Le Goater <[EMAIL PROTECTED]> Thanks ! C. > --- > > diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h > index b5f41d4..dda160c 100644 > --- a/include/linux/user_namespace.h > +++ b/include/linux/user_namespace.h > @@ -17,7 +17,7 @@ struct user_namespace { > > extern struct user_namespace init_user_ns; > > -#ifdef CONFIG_USER_NS > +#ifdef CONFIG_NAMESPACES_EXPERIMENTAL > > static inline struct user_namespace *get_user_ns(struct user_namespace *ns) > { > diff --git a/init/Kconfig b/init/Kconfig > index 684ccfb..0db1c3b 100644 > --- a/init/Kconfig > +++ b/init/Kconfig > @@ -206,15 +206,6 @@ config TASK_IO_ACCOUNTING > > Say N if unsure. > > -config USER_NS > - bool "User Namespaces (EXPERIMENTAL)" > - default n > - depends on EXPERIMENTAL > - help > - Support user namespaces. This allows containers, i.e. > - vservers, to use user namespaces to provide different > - user info for different servers. If unsure, say N. > - > config AUDIT > bool "Auditing support" > depends on NET > diff --git a/kernel/Makefile b/kernel/Makefile > index 76f782f..5817bfe 100644 > --- a/kernel/Makefile > +++ b/kernel/Makefile > @@ -4,7 +4,7 @@ > > obj-y = sched.o fork.o exec_domain.o panic.o printk.o profile.o \ > exit.o itimer.o time.o softirq.o resource.o \ > - sysctl.o capability.o ptrace.o timer.o user.o user_namespace.o \ > + sysctl.o capability.o ptrace.o timer.o user.o \ > signal.o sys.o kmod.o workqueue.o pid.o \ > rcupdate.o extable.o params.o posix-timers.o \ > kthread.o wait.o kfifo.o sys_ni.o posix-cpu-timers.o mutex.o \ > @@ -50,6 +49,7 @@ obj-$(CONFIG_AUDITSYSCALL) += auditsc.o > obj-$(CONFIG_KPROBES) += kprobes.o > obj-$(CONFIG_KGDB) += kgdb.o > obj-$(CONFIG_NAMESPACES) += utsname.o > +obj-$(CONFIG_NAMESPACES_EXPERIMENTAL) += user_namespace.o > obj-$(CONFIG_SYSFS) += ksysfs.o > obj-$(CONFIG_DETECT_SOFTLOCKUP) += softlockup.o > obj-$(CONFIG_GENERIC_HARDIRQS) += irq/ > diff --git a/kernel/user.c b/kernel/user.c > index b45f55f..d7c0831 100644 > --- a/kernel/user.c > +++ b/kernel/user.c > @@ -17,6 +17,15 @@ > #include > #include > > +struct user_namespace init_user_ns = { > + .kref = { > + .refcount = ATOMIC_INIT(2), > + }, > + .root_user = _user, > +}; > + > +EXPORT_SYMBOL_GPL(init_user_ns); > + > /* > * UID task count cache, to get fast user lookup in "alloc_uid" > * when changing user ID's (ie setuid() and friends). > @@ -199,6 +208,7 @@ void switch_uid(struct user_struct *new_ > suid_keys(current); > } > > +#ifdef CONFIG_NAMESPACES_EXPERIMENTAL > void release_uids(struct user_namespace *ns) > { > int i; > @@ -223,6 +233,7 @@ void release_uids(struct user_namespace > > free_uid(ns->root_user); > } > +#endif > > static int __init uid_cache_init(void) > { > diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c > index 7af90fc..4c90062 100644 > --- a/kernel/user_namespace.c > +++ b/kernel/user_namespace.c > @@ -10,17 +10,6 @@ > #include > #include > > -struct user_namespace init_user_ns = { > - .kref = { > - .refcount = ATOMIC_INIT(2), > - }, > - .root_user = _user, > -}; > - > -EXPORT_SYMBOL_GPL(init_user_ns); > - > -#ifdef CONFIG_USER_NS > - > /* > * Clone a new ns copying an original user ns, setting refcount to 1 > * @old_ns: namespace to clone > @@ -84,5 +73,3 @@ void free_user_ns(struct kref *kref) > release_uids(ns); > kfree(ns); > } > - > -#endif /* CONFIG_USER_NS */ > > - To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH 1/5] The config option itself
Pavel Emelyanov wrote: > The option is called NAMESPACES. It can be selectable only > if EMBEDDED is chosen (this was Eric's requisition). When > the EMBEDDED is off namespaces will be on automatically. > > One more option (NAMESPACES_EXPERIMENTAL) was added by > Serge's request to move there all the namespaces that are > not finished yet. Currently only the user and the network > namespaces are such. > > Signed-off-by: Pavel Emelyanov <[EMAIL PROTECTED]> Acked-by: Cedric Le Goater <[EMAIL PROTECTED]> Thanks ! C. > > --- > > diff --git a/init/Kconfig b/init/Kconfig > index 684ccfb..05a71d7 100644 > --- a/init/Kconfig > +++ b/init/Kconfig > @@ -369,6 +360,23 @@ config RELAY > > If unsure, say N. > > +config NAMESPACES > + bool "The namespaces support" if EMBEDDED > + default !EMBEDDED > + help > + Provides the way to make tasks work with different objects using > + the same id. For example same IPC id may refer to different objects > + or same user id or pid may refer to different tasks when used in > + different namespaces. > + > +config NAMESPACES_EXPERIMENTAL > + bool "Add the experimantal namespaces support" if EMBEDDED > + depends on NAMESPACES && EXPERIMENTAL > + default !EMBEDDED > + help > + Also include the support for the namespaces that are not fnished > + or well developed yet > + > config BLK_DEV_INITRD > bool "Initial RAM filesystem and RAM disk (initramfs/initrd) support" > depends on BROKEN || !FRV > - To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH 1/5] The config option itself
Pavel Emelyanov wrote: The option is called NAMESPACES. It can be selectable only if EMBEDDED is chosen (this was Eric's requisition). When the EMBEDDED is off namespaces will be on automatically. One more option (NAMESPACES_EXPERIMENTAL) was added by Serge's request to move there all the namespaces that are not finished yet. Currently only the user and the network namespaces are such. Signed-off-by: Pavel Emelyanov [EMAIL PROTECTED] Acked-by: Cedric Le Goater [EMAIL PROTECTED] Thanks ! C. --- diff --git a/init/Kconfig b/init/Kconfig index 684ccfb..05a71d7 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -369,6 +360,23 @@ config RELAY If unsure, say N. +config NAMESPACES + bool The namespaces support if EMBEDDED + default !EMBEDDED + help + Provides the way to make tasks work with different objects using + the same id. For example same IPC id may refer to different objects + or same user id or pid may refer to different tasks when used in + different namespaces. + +config NAMESPACES_EXPERIMENTAL + bool Add the experimantal namespaces support if EMBEDDED + depends on NAMESPACES EXPERIMENTAL + default !EMBEDDED + help + Also include the support for the namespaces that are not fnished + or well developed yet + config BLK_DEV_INITRD bool Initial RAM filesystem and RAM disk (initramfs/initrd) support depends on BROKEN || !FRV - To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH 2/5] Move the UST namespace under the option
Pavel Emelyanov wrote: Currently all the namespace management code is in the kernel/utsname.c file, so just compile it out and make stub in .h file. The init namespace itself is in init/version.c and is left in the kernel. Signed-off-by: Pavel Emelyanov [EMAIL PROTECTED] Acked-by: Cedric Le Goater [EMAIL PROTECTED] Thanks ! C. --- diff --git a/include/linux/utsname.h b/include/linux/utsname.h index 923db99..52b9116 100644 --- a/include/linux/utsname.h +++ b/include/linux/utsname.h @@ -35,6 +35,7 @@ struct new_utsname { #include linux/sched.h #include linux/kref.h #include linux/nsproxy.h +#include linux/err.h #include asm/atomic.h struct uts_namespace { @@ -43,6 +44,7 @@ struct uts_namespace { }; extern struct uts_namespace init_uts_ns; +#ifdef CONFIG_NAMESPACES static inline void get_uts_ns(struct uts_namespace *ns) { kref_get(ns-kref); @@ -56,6 +58,25 @@ static inline void put_uts_ns(struct uts { kref_put(ns-kref, free_uts_ns); } +#else +static inline void get_uts_ns(struct uts_namespace *ns) +{ +} + +static inline void put_uts_ns(struct uts_namespace *ns) +{ +} + +static inline struct uts_namespace *copy_utsname(unsigned long flags, + struct uts_namespace *ns) +{ + if (flags CLONE_NEWUTS) + return ERR_PTR(-EINVAL); + + return ns; +} +#endif + static inline struct new_utsname *utsname(void) { return current-nsproxy-uts_ns-name; diff --git a/kernel/Makefile b/kernel/Makefile index 76f782f..5817bfe 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -4,8 +4,7 @@ signal.o sys.o kmod.o workqueue.o pid.o \ rcupdate.o extable.o params.o posix-timers.o \ kthread.o wait.o kfifo.o sys_ni.o posix-cpu-timers.o mutex.o \ - hrtimer.o rwsem.o latency.o nsproxy.o srcu.o \ - utsname.o notifier.o sysctl.o + hrtimer.o rwsem.o latency.o nsproxy.o srcu.o notifier.o sysctl.o obj-$(CONFIG_SYSCTL) += sysctl_check.o obj-$(CONFIG_STACKTRACE) += stacktrace.o @@ -50,6 +49,7 @@ obj-$(CONFIG_AUDITSYSCALL) += auditsc.o obj-$(CONFIG_AUDIT_TREE) += audit_tree.o obj-$(CONFIG_KPROBES) += kprobes.o obj-$(CONFIG_KGDB) += kgdb.o +obj-$(CONFIG_NAMESPACES) += utsname.o obj-$(CONFIG_SYSFS) += ksysfs.o obj-$(CONFIG_DETECT_SOFTLOCKUP) += softlockup.o obj-$(CONFIG_GENERIC_HARDIRQS) += irq/ - To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/ - To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH 4/5] Move the user namespace under the option
Pavel Emelyanov wrote: We currently have a CONFIG_USER_NS option. Just rename it into CONFIG_NAMESPACES_EXPERIMANTAL and move the init_user_ns into user.c file to make the kernel compile and work without the namespaces support. Signed-off-by: Pavel Emelyanov [EMAIL PROTECTED] Acked-by: Cedric Le Goater [EMAIL PROTECTED] Thanks ! C. --- diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h index b5f41d4..dda160c 100644 --- a/include/linux/user_namespace.h +++ b/include/linux/user_namespace.h @@ -17,7 +17,7 @@ struct user_namespace { extern struct user_namespace init_user_ns; -#ifdef CONFIG_USER_NS +#ifdef CONFIG_NAMESPACES_EXPERIMENTAL static inline struct user_namespace *get_user_ns(struct user_namespace *ns) { diff --git a/init/Kconfig b/init/Kconfig index 684ccfb..0db1c3b 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -206,15 +206,6 @@ config TASK_IO_ACCOUNTING Say N if unsure. -config USER_NS - bool User Namespaces (EXPERIMENTAL) - default n - depends on EXPERIMENTAL - help - Support user namespaces. This allows containers, i.e. - vservers, to use user namespaces to provide different - user info for different servers. If unsure, say N. - config AUDIT bool Auditing support depends on NET diff --git a/kernel/Makefile b/kernel/Makefile index 76f782f..5817bfe 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -4,7 +4,7 @@ obj-y = sched.o fork.o exec_domain.o panic.o printk.o profile.o \ exit.o itimer.o time.o softirq.o resource.o \ - sysctl.o capability.o ptrace.o timer.o user.o user_namespace.o \ + sysctl.o capability.o ptrace.o timer.o user.o \ signal.o sys.o kmod.o workqueue.o pid.o \ rcupdate.o extable.o params.o posix-timers.o \ kthread.o wait.o kfifo.o sys_ni.o posix-cpu-timers.o mutex.o \ @@ -50,6 +49,7 @@ obj-$(CONFIG_AUDITSYSCALL) += auditsc.o obj-$(CONFIG_KPROBES) += kprobes.o obj-$(CONFIG_KGDB) += kgdb.o obj-$(CONFIG_NAMESPACES) += utsname.o +obj-$(CONFIG_NAMESPACES_EXPERIMENTAL) += user_namespace.o obj-$(CONFIG_SYSFS) += ksysfs.o obj-$(CONFIG_DETECT_SOFTLOCKUP) += softlockup.o obj-$(CONFIG_GENERIC_HARDIRQS) += irq/ diff --git a/kernel/user.c b/kernel/user.c index b45f55f..d7c0831 100644 --- a/kernel/user.c +++ b/kernel/user.c @@ -17,6 +17,15 @@ #include linux/module.h #include linux/user_namespace.h +struct user_namespace init_user_ns = { + .kref = { + .refcount = ATOMIC_INIT(2), + }, + .root_user = root_user, +}; + +EXPORT_SYMBOL_GPL(init_user_ns); + /* * UID task count cache, to get fast user lookup in alloc_uid * when changing user ID's (ie setuid() and friends). @@ -199,6 +208,7 @@ void switch_uid(struct user_struct *new_ suid_keys(current); } +#ifdef CONFIG_NAMESPACES_EXPERIMENTAL void release_uids(struct user_namespace *ns) { int i; @@ -223,6 +233,7 @@ void release_uids(struct user_namespace free_uid(ns-root_user); } +#endif static int __init uid_cache_init(void) { diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c index 7af90fc..4c90062 100644 --- a/kernel/user_namespace.c +++ b/kernel/user_namespace.c @@ -10,17 +10,6 @@ #include linux/nsproxy.h #include linux/user_namespace.h -struct user_namespace init_user_ns = { - .kref = { - .refcount = ATOMIC_INIT(2), - }, - .root_user = root_user, -}; - -EXPORT_SYMBOL_GPL(init_user_ns); - -#ifdef CONFIG_USER_NS - /* * Clone a new ns copying an original user ns, setting refcount to 1 * @old_ns: namespace to clone @@ -84,5 +73,3 @@ void free_user_ns(struct kref *kref) release_uids(ns); kfree(ns); } - -#endif /* CONFIG_USER_NS */ - To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH 3/5] Move the IPC namespace under the option
Pavel Emelyanov wrote: Currently all the IPC namespace management code is in ipc/util.c. I moved this code into ipc/namespace.c file which is compiled out when needed. The linux/ipc_namespace.h file is used to store the prototypes of the functions in namespace.c and the stubs for NAMESPACES=n case. This is done so, because the stub for copy_ipc_namespace requires the knoweledge of the CLONE_NEWIPC flag, which is in sched.h. But the linux/ipc.h file itself in included into many many .c files via the sys.h-sem.h sequence so adding the sched.h into it will make all these .c depend on sched.h which is not that good. On the other hand the knowledge about the namespaces stuff is required in 4 .c files only. Besides, this patch compiles out some auxiliary functions from ipc/sem.c, msg.c and shm.c files. Signied-off-by: Pavel Emelyanov [EMAIL PROTECTED] ^ that sounds french :) Also, for the code aesthetic, I think that ipc/ipc_namespace.c is a better name which fits the header file ipc/ipc_namespace.h. I'm wondering if it possible to move the init routines : #ifdef CONFIG_NAMESPACES int {msg,sem,shm}_init_ns(struct ipc_namespace *ns) { ... } #endif from the ipc/{msg,sem,shm}.c file to the ipc/ipc_namespace.c file. It would look better in the code than using ugly #ifdef. Thanks ! C. --- diff --git a/include/linux/ipc.h b/include/linux/ipc.h index 96988d1..b882610 100644 --- a/include/linux/ipc.h +++ b/include/linux/ipc.h @@ -100,56 +100,6 @@ struct kern_ipc_perm void*security; }; -struct ipc_ids; -struct ipc_namespace { - struct kref kref; - struct ipc_ids *ids[3]; - - int sem_ctls[4]; - int used_sems; - - int msg_ctlmax; - int msg_ctlmnb; - int msg_ctlmni; - - size_t shm_ctlmax; - size_t shm_ctlall; - int shm_ctlmni; - int shm_tot; -}; - -extern struct ipc_namespace init_ipc_ns; - -#ifdef CONFIG_SYSVIPC -#define INIT_IPC_NS(ns) .ns = init_ipc_ns, -extern void free_ipc_ns(struct kref *kref); -extern struct ipc_namespace *copy_ipcs(unsigned long flags, - struct ipc_namespace *ns); -#else -#define INIT_IPC_NS(ns) -static inline struct ipc_namespace *copy_ipcs(unsigned long flags, - struct ipc_namespace *ns) -{ - return ns; -} -#endif - -static inline struct ipc_namespace *get_ipc_ns(struct ipc_namespace *ns) -{ -#ifdef CONFIG_SYSVIPC - if (ns) - kref_get(ns-kref); -#endif - return ns; -} - -static inline void put_ipc_ns(struct ipc_namespace *ns) -{ -#ifdef CONFIG_SYSVIPC - kref_put(ns-kref, free_ipc_ns); -#endif -} - #endif /* __KERNEL__ */ #endif /* _LINUX_IPC_H */ diff --git a/include/linux/ipc_namespace.h b/include/linux/ipc_namespace.h new file mode 100644 index 000..3d8a516 --- /dev/null +++ b/include/linux/ipc_namespace.h @@ -0,0 +1,67 @@ +#ifndef __IPC_NAMESPACE_H__ +#define __IPC_NAMESPACE_H__ + +#include linux/err.h + +struct ipc_ids; +struct ipc_namespace { + struct kref kref; + struct ipc_ids *ids[3]; + + int sem_ctls[4]; + int used_sems; + + int msg_ctlmax; + int msg_ctlmnb; + int msg_ctlmni; + + size_t shm_ctlmax; + size_t shm_ctlall; + int shm_ctlmni; + int shm_tot; +}; + +extern struct ipc_namespace init_ipc_ns; + +#ifdef CONFIG_SYSVIPC +#define INIT_IPC_NS(ns) .ns = init_ipc_ns, +#else +#define INIT_IPC_NS(ns) +#endif + +#if defined(CONFIG_SYSVIPC) defined(CONFIG_NAMESPACES) +extern void free_ipc_ns(struct kref *kref); +extern struct ipc_namespace *copy_ipcs(unsigned long flags, + struct ipc_namespace *ns); + +static inline struct ipc_namespace *get_ipc_ns(struct ipc_namespace *ns) +{ + if (ns) + kref_get(ns-kref); + return ns; +} + +static inline void put_ipc_ns(struct ipc_namespace *ns) +{ + kref_put(ns-kref, free_ipc_ns); +} +#else +static inline struct ipc_namespace *copy_ipcs(unsigned long flags, + struct ipc_namespace *ns) +{ + if (flags CLONE_NEWIPC) + return ERR_PTR(-EINVAL); + + return ns; +} + +static inline struct ipc_namespace *get_ipc_ns(struct ipc_namespace *ns) +{ + return ns; +} + +static inline void put_ipc_ns(struct ipc_namespace *ns) +{ +} +#endif +#endif diff --git a/ipc/util.c b/ipc/util.c index fd29246..44fb843 100644 --- a/ipc/util.c +++ b/ipc/util.c @@ -32,6 +32,7 @@ #include linux/proc_fs.h #include linux/audit.h #include linux/nsproxy.h
Re: [PATCH] Use KMEM_CACHE macro to create the nsproxy cache
Pavel Emelyanov wrote: > The blessed way for standard caches is to use it. > Besides, this may give this cache a better alignment. > > Signed-off-by: Pavel Emelyanov <[EMAIL PROTECTED]> yes of course. thanks. Acked-by: Cedric Le Goater <[EMAIL PROTECTED]> > --- > > diff --git a/kernel/nsproxy.c b/kernel/nsproxy.c > index ee68964..31351cc 100644 > --- a/kernel/nsproxy.c > +++ b/kernel/nsproxy.c > @@ -222,8 +222,7 @@ void exit_task_namespaces(struct task_st > > static int __init nsproxy_cache_init(void) > { > - nsproxy_cachep = kmem_cache_create("nsproxy", sizeof(struct nsproxy), > -0, SLAB_PANIC, NULL); > + nsproxy_cachep = KMEM_CACHE(nsproxy, SLAB_PANIC); > return 0; > } > > ___ > Containers mailing list > [EMAIL PROTECTED] > https://lists.linux-foundation.org/mailman/listinfo/containers > - To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH] Remove unused member from nsproxy
Pavel Emelyanov wrote: > The nslock spinlock is not used in the kernel at all. it's also useless now that you have put some RCU rules around it. right ? C. > Remove it. > Signed-off-by: Pavel Emelyanov <[EMAIL PROTECTED]> > > --- > > diff --git a/include/linux/init_task.h b/include/linux/init_task.h > index a3f2541..cae35b6 100644 > --- a/include/linux/init_task.h > +++ b/include/linux/init_task.h > @@ -73,7 +73,6 @@ extern struct nsproxy init_nsproxy; > #define INIT_NSPROXY(nsproxy) { > \ > .pid_ns = _pid_ns, \ > .count = ATOMIC_INIT(1), \ > - .nslock = __SPIN_LOCK_UNLOCKED(nsproxy.nslock), \ > .uts_ns = _uts_ns, \ > .mnt_ns = NULL, \ > INIT_NET_NS(net_ns) \ > diff --git a/include/linux/nsproxy.h b/include/linux/nsproxy.h > index 4d564d8..0e66b57 100644 > --- a/include/linux/nsproxy.h > +++ b/include/linux/nsproxy.h > @@ -23,7 +23,6 @@ struct pid_namespace; > */ > struct nsproxy { > atomic_t count; > - spinlock_t nslock; > struct uts_namespace *uts_ns; > struct ipc_namespace *ipc_ns; > struct mnt_namespace *mnt_ns; > ___ > Containers mailing list > [EMAIL PROTECTED] > https://lists.linux-foundation.org/mailman/listinfo/containers > - To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH] Use KMEM_CACHE macro to create the nsproxy cache
Pavel Emelyanov wrote: The blessed way for standard caches is to use it. Besides, this may give this cache a better alignment. Signed-off-by: Pavel Emelyanov [EMAIL PROTECTED] yes of course. thanks. Acked-by: Cedric Le Goater [EMAIL PROTECTED] --- diff --git a/kernel/nsproxy.c b/kernel/nsproxy.c index ee68964..31351cc 100644 --- a/kernel/nsproxy.c +++ b/kernel/nsproxy.c @@ -222,8 +222,7 @@ void exit_task_namespaces(struct task_st static int __init nsproxy_cache_init(void) { - nsproxy_cachep = kmem_cache_create(nsproxy, sizeof(struct nsproxy), -0, SLAB_PANIC, NULL); + nsproxy_cachep = KMEM_CACHE(nsproxy, SLAB_PANIC); return 0; } ___ Containers mailing list [EMAIL PROTECTED] https://lists.linux-foundation.org/mailman/listinfo/containers - To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: 2.6.23-rc7-mm1
putting Vlad in Cc: Cedric Le Goater wrote: > Andrew Morton wrote: >> ftp://ftp.kernel.org/pub/linux/kernel/people/akpm/patches/2.6/2.6.23-rc7/2.6.23-rc7-mm1/ > > I also get this compile error on s390. 'linux/scatterlist.h' has disappeared > from the #include pile but where ? > > /home/clg/linux/2.6.23-rc7-mm1/net/sctp/auth.c: In function > `sctp_auth_calculate_hmac': > /home/clg/linux/2.6.23-rc7-mm1/net/sctp/auth.c:695: error: storage size of > 'sg' isn't known > /home/clg/linux/2.6.23-rc7-mm1/net/sctp/auth.c:695: warning: unused variable > `sg' The following patch works of course but it seems to simplistic for s390. Cheers, C. Signed-off-by: Cedric Le Goater <[EMAIL PROTECTED]> --- net/sctp/auth.c |1 + 1 file changed, 1 insertion(+) Index: 2.6.23-rc7-mm1/net/sctp/auth.c === --- 2.6.23-rc7-mm1.orig/net/sctp/auth.c +++ 2.6.23-rc7-mm1/net/sctp/auth.c @@ -36,6 +36,7 @@ #include #include +#include #include #include - To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: 2.6.23-rc7-mm1
Andrew Morton wrote: > ftp://ftp.kernel.org/pub/linux/kernel/people/akpm/patches/2.6/2.6.23-rc7/2.6.23-rc7-mm1/ I also get this compile error on s390. 'linux/scatterlist.h' has disappeared from the #include pile but where ? /home/clg/linux/2.6.23-rc7-mm1/net/sctp/auth.c: In function `sctp_auth_calculate_hmac': /home/clg/linux/2.6.23-rc7-mm1/net/sctp/auth.c:695: error: storage size of 'sg' isn't known /home/clg/linux/2.6.23-rc7-mm1/net/sctp/auth.c:695: warning: unused variable `sg' Cheers, C. - To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: 2.6.23-rc7-mm1 -- s390 compile failures
Andy Whitcroft wrote: > Getting compile errors on S390: > > CC arch/s390/mm/cmm.o > arch/s390/mm/cmm.c: In function `cmm_init': > arch/s390/mm/cmm.c:431: error: implicit declaration of function > `register_oom_notifier' > arch/s390/mm/cmm.c:443: error: implicit declaration of function > `unregister_oom_notifier' > make[1]: *** [arch/s390/mm/cmm.o] Error 1 > make: *** [arch/s390/mm] Error 2 yes. It's from oom-move-prototypes-to-appropriate-header-file.patch. I think this patch fixes it. C. Signed-off-by: Cedric Le Goater <[EMAIL PROTECTED]> --- arch/s390/mm/cmm.c |1 + 1 file changed, 1 insertion(+) Index: 2.6.23-rc7-mm1/arch/s390/mm/cmm.c === --- 2.6.23-rc7-mm1.orig/arch/s390/mm/cmm.c +++ 2.6.23-rc7-mm1/arch/s390/mm/cmm.c @@ -17,6 +17,7 @@ #include #include #include +#include #include #include - To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: 2.6.23-rc7-mm1
Andrew Morton wrote: > ftp://ftp.kernel.org/pub/linux/kernel/people/akpm/patches/2.6/2.6.23-rc7/2.6.23-rc7-mm1/ /home/clg/linux/2.6.23-rc7-mm1/drivers/s390/block/dasd_eckd.c: In function `dasd_eckd_build_cp': /home/clg/linux/2.6.23-rc7-mm1/drivers/s390/block/dasd_eckd.c:1181: error: syntax error before "struct" /home/clg/linux/2.6.23-rc7-mm1/drivers/s390/block/dasd_eckd.c:1209: error: `iter' undeclared (first use in this function) /home/clg/linux/2.6.23-rc7-mm1/drivers/s390/block/dasd_eckd.c:1209: error: (Each undeclared identifier is reported only once /home/clg/linux/2.6.23-rc7-mm1/drivers/s390/block/dasd_eckd.c:1209: error: for each function it appears in.) /home/clg/linux/2.6.23-rc7-mm1/drivers/s390/block/dasd_eckd.c:1209: error: `bv' undeclared (first use in this function) /home/clg/linux/2.6.23-rc7-mm1/drivers/s390/block/dasd_eckd.c:1209: warning: left-hand operand of comma expression has no effect /home/clg/linux/2.6.23-rc7-mm1/drivers/s390/block/dasd_eckd.c:1209: warning: left-hand operand of comma expression has no effect /home/clg/linux/2.6.23-rc7-mm1/drivers/s390/block/dasd_eckd.c:1257: warning: left-hand operand of comma expression has no effect /home/clg/linux/2.6.23-rc7-mm1/drivers/s390/block/dasd_eckd.c:1257: warning: left-hand operand of comma expression has no effect /home/clg/linux/2.6.23-rc7-mm1/drivers/s390/block/dasd_eckd.c:1209: warning: statement with no effect /home/clg/linux/2.6.23-rc7-mm1/drivers/s390/block/dasd_eckd.c:1209: warning: statement with no effect /home/clg/linux/2.6.23-rc7-mm1/drivers/s390/block/dasd_eckd.c:1257: warning: statement with no effect /home/clg/linux/2.6.23-rc7-mm1/drivers/s390/block/dasd_eckd.c:1257: warning: statement with no effect make[3]: *** [drivers/s390/block/dasd_eckd.o] Error 1 make[2]: *** [drivers/s390/block] Error 2 Signed-off-by: Cedric Le Goater <[EMAIL PROTECTED]> --- drivers/s390/block/dasd_eckd.c |2 +- 1 file changed, 1 insertion(+), 1 deletion(-) Index: 2.6.23-rc7-mm1/drivers/s390/block/dasd_eckd.c === --- 2.6.23-rc7-mm1.orig/drivers/s390/block/dasd_eckd.c +++ 2.6.23-rc7-mm1/drivers/s390/block/dasd_eckd.c @@ -1176,7 +1176,7 @@ dasd_eckd_build_cp(struct dasd_device * struct LO_eckd_data *LO_data; struct dasd_ccw_req *cqr; struct ccw1 *ccw; - struct req_iterator iter + struct req_iterator iter; struct bio_vec *bv; char *dst; unsigned int blksize, blk_per_trk, off; - To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: 2.6.23-rc7-mm1
Andrew Morton wrote: ftp://ftp.kernel.org/pub/linux/kernel/people/akpm/patches/2.6/2.6.23-rc7/2.6.23-rc7-mm1/ /home/clg/linux/2.6.23-rc7-mm1/drivers/s390/block/dasd_eckd.c: In function `dasd_eckd_build_cp': /home/clg/linux/2.6.23-rc7-mm1/drivers/s390/block/dasd_eckd.c:1181: error: syntax error before struct /home/clg/linux/2.6.23-rc7-mm1/drivers/s390/block/dasd_eckd.c:1209: error: `iter' undeclared (first use in this function) /home/clg/linux/2.6.23-rc7-mm1/drivers/s390/block/dasd_eckd.c:1209: error: (Each undeclared identifier is reported only once /home/clg/linux/2.6.23-rc7-mm1/drivers/s390/block/dasd_eckd.c:1209: error: for each function it appears in.) /home/clg/linux/2.6.23-rc7-mm1/drivers/s390/block/dasd_eckd.c:1209: error: `bv' undeclared (first use in this function) /home/clg/linux/2.6.23-rc7-mm1/drivers/s390/block/dasd_eckd.c:1209: warning: left-hand operand of comma expression has no effect /home/clg/linux/2.6.23-rc7-mm1/drivers/s390/block/dasd_eckd.c:1209: warning: left-hand operand of comma expression has no effect /home/clg/linux/2.6.23-rc7-mm1/drivers/s390/block/dasd_eckd.c:1257: warning: left-hand operand of comma expression has no effect /home/clg/linux/2.6.23-rc7-mm1/drivers/s390/block/dasd_eckd.c:1257: warning: left-hand operand of comma expression has no effect /home/clg/linux/2.6.23-rc7-mm1/drivers/s390/block/dasd_eckd.c:1209: warning: statement with no effect /home/clg/linux/2.6.23-rc7-mm1/drivers/s390/block/dasd_eckd.c:1209: warning: statement with no effect /home/clg/linux/2.6.23-rc7-mm1/drivers/s390/block/dasd_eckd.c:1257: warning: statement with no effect /home/clg/linux/2.6.23-rc7-mm1/drivers/s390/block/dasd_eckd.c:1257: warning: statement with no effect make[3]: *** [drivers/s390/block/dasd_eckd.o] Error 1 make[2]: *** [drivers/s390/block] Error 2 Signed-off-by: Cedric Le Goater [EMAIL PROTECTED] --- drivers/s390/block/dasd_eckd.c |2 +- 1 file changed, 1 insertion(+), 1 deletion(-) Index: 2.6.23-rc7-mm1/drivers/s390/block/dasd_eckd.c === --- 2.6.23-rc7-mm1.orig/drivers/s390/block/dasd_eckd.c +++ 2.6.23-rc7-mm1/drivers/s390/block/dasd_eckd.c @@ -1176,7 +1176,7 @@ dasd_eckd_build_cp(struct dasd_device * struct LO_eckd_data *LO_data; struct dasd_ccw_req *cqr; struct ccw1 *ccw; - struct req_iterator iter + struct req_iterator iter; struct bio_vec *bv; char *dst; unsigned int blksize, blk_per_trk, off; - To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: 2.6.23-rc7-mm1 -- s390 compile failures
Andy Whitcroft wrote: Getting compile errors on S390: CC arch/s390/mm/cmm.o arch/s390/mm/cmm.c: In function `cmm_init': arch/s390/mm/cmm.c:431: error: implicit declaration of function `register_oom_notifier' arch/s390/mm/cmm.c:443: error: implicit declaration of function `unregister_oom_notifier' make[1]: *** [arch/s390/mm/cmm.o] Error 1 make: *** [arch/s390/mm] Error 2 yes. It's from oom-move-prototypes-to-appropriate-header-file.patch. I think this patch fixes it. C. Signed-off-by: Cedric Le Goater [EMAIL PROTECTED] --- arch/s390/mm/cmm.c |1 + 1 file changed, 1 insertion(+) Index: 2.6.23-rc7-mm1/arch/s390/mm/cmm.c === --- 2.6.23-rc7-mm1.orig/arch/s390/mm/cmm.c +++ 2.6.23-rc7-mm1/arch/s390/mm/cmm.c @@ -17,6 +17,7 @@ #include linux/ctype.h #include linux/swap.h #include linux/kthread.h +#include linux/oom.h #include asm/pgalloc.h #include asm/uaccess.h - To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: 2.6.23-rc7-mm1
Andrew Morton wrote: ftp://ftp.kernel.org/pub/linux/kernel/people/akpm/patches/2.6/2.6.23-rc7/2.6.23-rc7-mm1/ I also get this compile error on s390. 'linux/scatterlist.h' has disappeared from the #include pile but where ? /home/clg/linux/2.6.23-rc7-mm1/net/sctp/auth.c: In function `sctp_auth_calculate_hmac': /home/clg/linux/2.6.23-rc7-mm1/net/sctp/auth.c:695: error: storage size of 'sg' isn't known /home/clg/linux/2.6.23-rc7-mm1/net/sctp/auth.c:695: warning: unused variable `sg' Cheers, C. - To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: 2.6.23-rc7-mm1
putting Vlad in Cc: Cedric Le Goater wrote: Andrew Morton wrote: ftp://ftp.kernel.org/pub/linux/kernel/people/akpm/patches/2.6/2.6.23-rc7/2.6.23-rc7-mm1/ I also get this compile error on s390. 'linux/scatterlist.h' has disappeared from the #include pile but where ? /home/clg/linux/2.6.23-rc7-mm1/net/sctp/auth.c: In function `sctp_auth_calculate_hmac': /home/clg/linux/2.6.23-rc7-mm1/net/sctp/auth.c:695: error: storage size of 'sg' isn't known /home/clg/linux/2.6.23-rc7-mm1/net/sctp/auth.c:695: warning: unused variable `sg' The following patch works of course but it seems to simplistic for s390. Cheers, C. Signed-off-by: Cedric Le Goater [EMAIL PROTECTED] --- net/sctp/auth.c |1 + 1 file changed, 1 insertion(+) Index: 2.6.23-rc7-mm1/net/sctp/auth.c === --- 2.6.23-rc7-mm1.orig/net/sctp/auth.c +++ 2.6.23-rc7-mm1/net/sctp/auth.c @@ -36,6 +36,7 @@ #include linux/types.h #include linux/crypto.h +#include linux/scatterlist.h #include net/sctp/sctp.h #include net/sctp/auth.h - To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH 3/3] user.c: #ifdef ->mq_bytes
Hello Alexey ! Alexey Dobriyan wrote: > for those who deselect POSIX message queues. > > Reduces SLAB size of user_struct from 64 to 32 bytes here, > SLUB size -- from 40 bytes to 32 bytes. > > Signed-off-by: Alexey Dobriyan <[EMAIL PROTECTED]> > --- > > include/linux/sched.h |2 ++ > kernel/user.c |2 -- > 2 files changed, 2 insertions(+), 2 deletions(-) > > --- a/include/linux/sched.h > +++ b/include/linux/sched.h > @@ -583,8 +583,10 @@ struct user_struct { > atomic_t inotify_watches; /* How many inotify watches does this user > have? */ > atomic_t inotify_devs; /* How many inotify devs does this user have > opened? */ > #endif > +#ifdef CONFIG_POSIX_MQUEUE > /* protected by mq_lock */ > unsigned long mq_bytes; /* How many bytes can be allocated to mqueue? */ > +#endif > unsigned long locked_shm; /* How many pages of mlocked shm ? */ while you are it, it seems possible to #ifdef locked_shm also. it's a bit more complex because the code is mm/ and there are some links with the hugetlbfs also. Cheers, C. > #ifdef CONFIG_KEYS > --- a/kernel/user.c > +++ b/kernel/user.c > @@ -44,7 +44,6 @@ struct user_struct root_user = { > .processes = ATOMIC_INIT(1), > .files = ATOMIC_INIT(0), > .sigpending = ATOMIC_INIT(0), > - .mq_bytes = 0, > .locked_shm = 0, > #ifdef CONFIG_KEYS > .uid_keyring= _user_keyring, > > - > To unsubscribe from this list: send the line "unsubscribe linux-kernel" in > the body of a message to [EMAIL PROTECTED] > More majordomo info at http://vger.kernel.org/majordomo-info.html > Please read the FAQ at http://www.tux.org/lkml/ > - To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH 3/3] user.c: #ifdef -mq_bytes
Hello Alexey ! Alexey Dobriyan wrote: for those who deselect POSIX message queues. Reduces SLAB size of user_struct from 64 to 32 bytes here, SLUB size -- from 40 bytes to 32 bytes. Signed-off-by: Alexey Dobriyan [EMAIL PROTECTED] --- include/linux/sched.h |2 ++ kernel/user.c |2 -- 2 files changed, 2 insertions(+), 2 deletions(-) --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -583,8 +583,10 @@ struct user_struct { atomic_t inotify_watches; /* How many inotify watches does this user have? */ atomic_t inotify_devs; /* How many inotify devs does this user have opened? */ #endif +#ifdef CONFIG_POSIX_MQUEUE /* protected by mq_lock */ unsigned long mq_bytes; /* How many bytes can be allocated to mqueue? */ +#endif unsigned long locked_shm; /* How many pages of mlocked shm ? */ while you are it, it seems possible to #ifdef locked_shm also. it's a bit more complex because the code is mm/ and there are some links with the hugetlbfs also. Cheers, C. #ifdef CONFIG_KEYS --- a/kernel/user.c +++ b/kernel/user.c @@ -44,7 +44,6 @@ struct user_struct root_user = { .processes = ATOMIC_INIT(1), .files = ATOMIC_INIT(0), .sigpending = ATOMIC_INIT(0), - .mq_bytes = 0, .locked_shm = 0, #ifdef CONFIG_KEYS .uid_keyring= root_user_keyring, - To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/ - To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: 2.6.23-rc6-mm1 - make access to tasks nsproxy ligther (fix)
Cedric Le Goater wrote: > Pavel Emelyanov wrote: >> Looks sane :) >> >> [snip] >> >>> Index: 2.6.23-rc6-mm1/kernel/exit.c >>> === >>> --- 2.6.23-rc6-mm1.orig/kernel/exit.c >>> +++ 2.6.23-rc6-mm1/kernel/exit.c >>> @@ -408,6 +408,8 @@ void daemonize(const char *name, ...) >>> current->fs = fs; >>> atomic_inc(>count); >>> >>> + if (current->nsproxy != init_task.nsproxy) >>> + get_nsproxy(init_task.nsproxy); >>> switch_task_namespaces(current, init_task.nsproxy); >> shouldn't we make the switch under this if() as well? > > right. we can probably simplify switch_task_namespaces() and remove : > > if (ns == new) > return; > > I'll cook a better one today. So I removed this test in * daemonize() bc it is already done * sys_unshare() bc the nsproxy is always new one * exit_task_namespaces() bc it is called with NULL and the task will die right after that. C. make-access-to-tasks-nsproxy-lighter.patch breaks unshare() when called from unshare(), switch_task_namespaces() takes an extra refcount on the nsproxy, leading to a memory leak of nsproxy objects. Now the problem is that we still need that extra ref when called from daemonize(). Here's an ugly fix for it. Signed-off-by: Cedric Le Goater <[EMAIL PROTECTED]> Cc: Serge E. Hallyn <[EMAIL PROTECTED]> Cc: Pavel Emelyanov <[EMAIL PROTECTED]> Cc: Eric W. Biederman <[EMAIL PROTECTED]> Cc: Oleg Nesterov <[EMAIL PROTECTED]> Cc: Paul E. McKenney <[EMAIL PROTECTED]> --- include/linux/nsproxy.h |5 + kernel/exit.c |5 - kernel/nsproxy.c|9 - 3 files changed, 9 insertions(+), 10 deletions(-) Index: 2.6.23-rc6-mm1/kernel/nsproxy.c === --- 2.6.23-rc6-mm1.orig/kernel/nsproxy.c +++ 2.6.23-rc6-mm1/kernel/nsproxy.c @@ -25,11 +25,6 @@ static struct kmem_cache *nsproxy_cachep struct nsproxy init_nsproxy = INIT_NSPROXY(init_nsproxy); -static inline void get_nsproxy(struct nsproxy *ns) -{ - atomic_inc(>count); -} - /* * creates a copy of "orig" with refcount 1. */ @@ -205,11 +200,7 @@ void switch_task_namespaces(struct task_ might_sleep(); ns = p->nsproxy; - if (ns == new) - return; - if (new) - get_nsproxy(new); rcu_assign_pointer(p->nsproxy, new); if (ns && atomic_dec_and_test(>count)) { Index: 2.6.23-rc6-mm1/kernel/exit.c === --- 2.6.23-rc6-mm1.orig/kernel/exit.c +++ 2.6.23-rc6-mm1/kernel/exit.c @@ -408,7 +408,10 @@ void daemonize(const char *name, ...) current->fs = fs; atomic_inc(>count); - switch_task_namespaces(current, init_task.nsproxy); + if (current->nsproxy != init_task.nsproxy) { + get_nsproxy(init_task.nsproxy); + switch_task_namespaces(current, init_task.nsproxy); + } exit_files(current); current->files = init_task.files; Index: 2.6.23-rc6-mm1/include/linux/nsproxy.h === --- 2.6.23-rc6-mm1.orig/include/linux/nsproxy.h +++ 2.6.23-rc6-mm1/include/linux/nsproxy.h @@ -77,6 +77,11 @@ static inline void put_nsproxy(struct ns } } +static inline void get_nsproxy(struct nsproxy *ns) +{ + atomic_inc(>count); +} + #ifdef CONFIG_CONTAINER_NS int ns_container_clone(struct task_struct *tsk); #else - To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: 2.6.23-rc6-mm1 - make access to tasks nsproxy ligther (fix)
Pavel Emelyanov wrote: > Looks sane :) > > [snip] > >> Index: 2.6.23-rc6-mm1/kernel/exit.c >> === >> --- 2.6.23-rc6-mm1.orig/kernel/exit.c >> +++ 2.6.23-rc6-mm1/kernel/exit.c >> @@ -408,6 +408,8 @@ void daemonize(const char *name, ...) >> current->fs = fs; >> atomic_inc(>count); >> >> +if (current->nsproxy != init_task.nsproxy) >> +get_nsproxy(init_task.nsproxy); >> switch_task_namespaces(current, init_task.nsproxy); > > shouldn't we make the switch under this if() as well? right. we can probably simplify switch_task_namespaces() and remove : if (ns == new) return; I'll cook a better one today. Thanks ! C. - To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: 2.6.23-rc6-mm1 - make access to tasks nsproxy ligther (fix)
Hello ! Andrew Morton wrote: > ftp://ftp.kernel.org/pub/linux/kernel/people/akpm/patches/2.6/2.6.23-rc6/2.6.23-rc6-mm1/ make-access-to-tasks-nsproxy-lighter.patch breaks unshare() when called from unshare(), switch_task_namespaces() takes an extra refcount on the nsproxy, leading to a memory leak of nsproxy objects. Now the problem is that we still need that extra ref when called from daemonize(). Here's an ugly fix for it. Signed-off-by: Cedric Le Goater <[EMAIL PROTECTED]> --- include/linux/nsproxy.h |5 + kernel/exit.c |2 ++ kernel/nsproxy.c|7 --- 3 files changed, 7 insertions(+), 7 deletions(-) Index: 2.6.23-rc6-mm1/kernel/nsproxy.c === --- 2.6.23-rc6-mm1.orig/kernel/nsproxy.c +++ 2.6.23-rc6-mm1/kernel/nsproxy.c @@ -25,11 +25,6 @@ static struct kmem_cache *nsproxy_cachep struct nsproxy init_nsproxy = INIT_NSPROXY(init_nsproxy); -static inline void get_nsproxy(struct nsproxy *ns) -{ - atomic_inc(>count); -} - /* * creates a copy of "orig" with refcount 1. */ @@ -208,8 +203,6 @@ void switch_task_namespaces(struct task_ if (ns == new) return; - if (new) - get_nsproxy(new); rcu_assign_pointer(p->nsproxy, new); if (ns && atomic_dec_and_test(>count)) { Index: 2.6.23-rc6-mm1/kernel/exit.c === --- 2.6.23-rc6-mm1.orig/kernel/exit.c +++ 2.6.23-rc6-mm1/kernel/exit.c @@ -408,6 +408,8 @@ void daemonize(const char *name, ...) current->fs = fs; atomic_inc(>count); + if (current->nsproxy != init_task.nsproxy) + get_nsproxy(init_task.nsproxy); switch_task_namespaces(current, init_task.nsproxy); exit_files(current); Index: 2.6.23-rc6-mm1/include/linux/nsproxy.h === --- 2.6.23-rc6-mm1.orig/include/linux/nsproxy.h +++ 2.6.23-rc6-mm1/include/linux/nsproxy.h @@ -77,6 +77,11 @@ static inline void put_nsproxy(struct ns } } +static inline void get_nsproxy(struct nsproxy *ns) +{ + atomic_inc(>count); +} + #ifdef CONFIG_CONTAINER_NS int ns_container_clone(struct task_struct *tsk); #else - To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: 2.6.23-rc6-mm1 - make access to tasks nsproxy ligther (fix)
Hello ! Andrew Morton wrote: ftp://ftp.kernel.org/pub/linux/kernel/people/akpm/patches/2.6/2.6.23-rc6/2.6.23-rc6-mm1/ make-access-to-tasks-nsproxy-lighter.patch breaks unshare() when called from unshare(), switch_task_namespaces() takes an extra refcount on the nsproxy, leading to a memory leak of nsproxy objects. Now the problem is that we still need that extra ref when called from daemonize(). Here's an ugly fix for it. Signed-off-by: Cedric Le Goater [EMAIL PROTECTED] --- include/linux/nsproxy.h |5 + kernel/exit.c |2 ++ kernel/nsproxy.c|7 --- 3 files changed, 7 insertions(+), 7 deletions(-) Index: 2.6.23-rc6-mm1/kernel/nsproxy.c === --- 2.6.23-rc6-mm1.orig/kernel/nsproxy.c +++ 2.6.23-rc6-mm1/kernel/nsproxy.c @@ -25,11 +25,6 @@ static struct kmem_cache *nsproxy_cachep struct nsproxy init_nsproxy = INIT_NSPROXY(init_nsproxy); -static inline void get_nsproxy(struct nsproxy *ns) -{ - atomic_inc(ns-count); -} - /* * creates a copy of orig with refcount 1. */ @@ -208,8 +203,6 @@ void switch_task_namespaces(struct task_ if (ns == new) return; - if (new) - get_nsproxy(new); rcu_assign_pointer(p-nsproxy, new); if (ns atomic_dec_and_test(ns-count)) { Index: 2.6.23-rc6-mm1/kernel/exit.c === --- 2.6.23-rc6-mm1.orig/kernel/exit.c +++ 2.6.23-rc6-mm1/kernel/exit.c @@ -408,6 +408,8 @@ void daemonize(const char *name, ...) current-fs = fs; atomic_inc(fs-count); + if (current-nsproxy != init_task.nsproxy) + get_nsproxy(init_task.nsproxy); switch_task_namespaces(current, init_task.nsproxy); exit_files(current); Index: 2.6.23-rc6-mm1/include/linux/nsproxy.h === --- 2.6.23-rc6-mm1.orig/include/linux/nsproxy.h +++ 2.6.23-rc6-mm1/include/linux/nsproxy.h @@ -77,6 +77,11 @@ static inline void put_nsproxy(struct ns } } +static inline void get_nsproxy(struct nsproxy *ns) +{ + atomic_inc(ns-count); +} + #ifdef CONFIG_CONTAINER_NS int ns_container_clone(struct task_struct *tsk); #else - To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: 2.6.23-rc6-mm1 - make access to tasks nsproxy ligther (fix)
Pavel Emelyanov wrote: Looks sane :) [snip] Index: 2.6.23-rc6-mm1/kernel/exit.c === --- 2.6.23-rc6-mm1.orig/kernel/exit.c +++ 2.6.23-rc6-mm1/kernel/exit.c @@ -408,6 +408,8 @@ void daemonize(const char *name, ...) current-fs = fs; atomic_inc(fs-count); +if (current-nsproxy != init_task.nsproxy) +get_nsproxy(init_task.nsproxy); switch_task_namespaces(current, init_task.nsproxy); shouldn't we make the switch under this if() as well? right. we can probably simplify switch_task_namespaces() and remove : if (ns == new) return; I'll cook a better one today. Thanks ! C. - To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: 2.6.23-rc6-mm1 - make access to tasks nsproxy ligther (fix)
Cedric Le Goater wrote: Pavel Emelyanov wrote: Looks sane :) [snip] Index: 2.6.23-rc6-mm1/kernel/exit.c === --- 2.6.23-rc6-mm1.orig/kernel/exit.c +++ 2.6.23-rc6-mm1/kernel/exit.c @@ -408,6 +408,8 @@ void daemonize(const char *name, ...) current-fs = fs; atomic_inc(fs-count); + if (current-nsproxy != init_task.nsproxy) + get_nsproxy(init_task.nsproxy); switch_task_namespaces(current, init_task.nsproxy); shouldn't we make the switch under this if() as well? right. we can probably simplify switch_task_namespaces() and remove : if (ns == new) return; I'll cook a better one today. So I removed this test in * daemonize() bc it is already done * sys_unshare() bc the nsproxy is always new one * exit_task_namespaces() bc it is called with NULL and the task will die right after that. C. make-access-to-tasks-nsproxy-lighter.patch breaks unshare() when called from unshare(), switch_task_namespaces() takes an extra refcount on the nsproxy, leading to a memory leak of nsproxy objects. Now the problem is that we still need that extra ref when called from daemonize(). Here's an ugly fix for it. Signed-off-by: Cedric Le Goater [EMAIL PROTECTED] Cc: Serge E. Hallyn [EMAIL PROTECTED] Cc: Pavel Emelyanov [EMAIL PROTECTED] Cc: Eric W. Biederman [EMAIL PROTECTED] Cc: Oleg Nesterov [EMAIL PROTECTED] Cc: Paul E. McKenney [EMAIL PROTECTED] --- include/linux/nsproxy.h |5 + kernel/exit.c |5 - kernel/nsproxy.c|9 - 3 files changed, 9 insertions(+), 10 deletions(-) Index: 2.6.23-rc6-mm1/kernel/nsproxy.c === --- 2.6.23-rc6-mm1.orig/kernel/nsproxy.c +++ 2.6.23-rc6-mm1/kernel/nsproxy.c @@ -25,11 +25,6 @@ static struct kmem_cache *nsproxy_cachep struct nsproxy init_nsproxy = INIT_NSPROXY(init_nsproxy); -static inline void get_nsproxy(struct nsproxy *ns) -{ - atomic_inc(ns-count); -} - /* * creates a copy of orig with refcount 1. */ @@ -205,11 +200,7 @@ void switch_task_namespaces(struct task_ might_sleep(); ns = p-nsproxy; - if (ns == new) - return; - if (new) - get_nsproxy(new); rcu_assign_pointer(p-nsproxy, new); if (ns atomic_dec_and_test(ns-count)) { Index: 2.6.23-rc6-mm1/kernel/exit.c === --- 2.6.23-rc6-mm1.orig/kernel/exit.c +++ 2.6.23-rc6-mm1/kernel/exit.c @@ -408,7 +408,10 @@ void daemonize(const char *name, ...) current-fs = fs; atomic_inc(fs-count); - switch_task_namespaces(current, init_task.nsproxy); + if (current-nsproxy != init_task.nsproxy) { + get_nsproxy(init_task.nsproxy); + switch_task_namespaces(current, init_task.nsproxy); + } exit_files(current); current-files = init_task.files; Index: 2.6.23-rc6-mm1/include/linux/nsproxy.h === --- 2.6.23-rc6-mm1.orig/include/linux/nsproxy.h +++ 2.6.23-rc6-mm1/include/linux/nsproxy.h @@ -77,6 +77,11 @@ static inline void put_nsproxy(struct ns } } +static inline void get_nsproxy(struct nsproxy *ns) +{ + atomic_inc(ns-count); +} + #ifdef CONFIG_CONTAINER_NS int ns_container_clone(struct task_struct *tsk); #else - To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH] Hookup group-scheduler with task container infrastructure
Paul Menage wrote: > On 9/10/07, Dmitry Adamushko <[EMAIL PROTECTED]> wrote: >> On 10/09/2007, Srivatsa Vaddagiri <[EMAIL PROTECTED]> wrote: >>> On Mon, Sep 10, 2007 at 10:22:59AM -0700, Andrew Morton wrote: objection ;) "cpuctlr" isn't memorable. Kernel code is write-rarely, read-often. "cpu_controller", please. The extra typing is worth it ;) >>> Ok! Here's the modified patch (against 2.6.23-rc4-mm1). >> as everyone seems to be in a quest for a better name... I think, the >> obvious one would be just 'group_sched'. >> > > But "sched" on its own could refer to CPU scheduling, I/O scheduling, > network scheduling, ... > > And "group" is more or less implied by the fact that it's in the > containers/control groups filesystem. "control groups" is the name of your framework. right ? > So "group_sched" isn't really all that informative. The name should > definitely contain either "cpu" or "cfs". "cfs" control group subsystem. "cfs" looks good enough to identify the subsystem. C. - To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH] Hookup group-scheduler with task container infrastructure
Paul Menage wrote: On 9/10/07, Dmitry Adamushko [EMAIL PROTECTED] wrote: On 10/09/2007, Srivatsa Vaddagiri [EMAIL PROTECTED] wrote: On Mon, Sep 10, 2007 at 10:22:59AM -0700, Andrew Morton wrote: objection ;) cpuctlr isn't memorable. Kernel code is write-rarely, read-often. cpu_controller, please. The extra typing is worth it ;) Ok! Here's the modified patch (against 2.6.23-rc4-mm1). as everyone seems to be in a quest for a better name... I think, the obvious one would be just 'group_sched'. But sched on its own could refer to CPU scheduling, I/O scheduling, network scheduling, ... And group is more or less implied by the fact that it's in the containers/control groups filesystem. control groups is the name of your framework. right ? So group_sched isn't really all that informative. The name should definitely contain either cpu or cfs. cfs control group subsystem. cfs looks good enough to identify the subsystem. C. - To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH -mm] ipc namespace: remove config ipc ns fix
Finish the work : kill all #ifdef CONFIG_IPC_NS. Thanks Robert ! C. Signed-off-by: Cedric Le Goater <[EMAIL PROTECTED]> Cc: Andrew Morton <[EMAIL PROTECTED]> Cc: Eric Biederman <[EMAIL PROTECTED]> Cc: Robert P. J. Day <[EMAIL PROTECTED]> --- ipc/ipc_sysctl.c |4 1 file changed, 4 deletions(-) Index: 2.6.23-rc4-mm1/ipc/ipc_sysctl.c === --- 2.6.23-rc4-mm1.orig/ipc/ipc_sysctl.c +++ 2.6.23-rc4-mm1/ipc/ipc_sysctl.c @@ -15,7 +15,6 @@ #include #include -#ifdef CONFIG_IPC_NS static void *get_ipc(ctl_table *table) { char *which = table->data; @@ -23,9 +22,6 @@ static void *get_ipc(ctl_table *table) which = (which - (char *)_ipc_ns) + (char *)ipc_ns; return which; } -#else -#define get_ipc(T) ((T)->data) -#endif #ifdef CONFIG_PROC_FS static int proc_ipc_dointvec(ctl_table *table, int write, struct file *filp, - To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH -mm] ipc namespace: remove config ipc ns fix
Finish the work : kill all #ifdef CONFIG_IPC_NS. Thanks Robert ! C. Signed-off-by: Cedric Le Goater [EMAIL PROTECTED] Cc: Andrew Morton [EMAIL PROTECTED] Cc: Eric Biederman [EMAIL PROTECTED] Cc: Robert P. J. Day [EMAIL PROTECTED] --- ipc/ipc_sysctl.c |4 1 file changed, 4 deletions(-) Index: 2.6.23-rc4-mm1/ipc/ipc_sysctl.c === --- 2.6.23-rc4-mm1.orig/ipc/ipc_sysctl.c +++ 2.6.23-rc4-mm1/ipc/ipc_sysctl.c @@ -15,7 +15,6 @@ #include linux/sysctl.h #include linux/uaccess.h -#ifdef CONFIG_IPC_NS static void *get_ipc(ctl_table *table) { char *which = table-data; @@ -23,9 +22,6 @@ static void *get_ipc(ctl_table *table) which = (which - (char *)init_ipc_ns) + (char *)ipc_ns; return which; } -#else -#define get_ipc(T) ((T)-data) -#endif #ifdef CONFIG_PROC_FS static int proc_ipc_dointvec(ctl_table *table, int write, struct file *filp, - To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH] userns: don't leak root user
Alexey Dobriyan wrote: > Signed-off-by: Alexey Dobriyan <[EMAIL PROTECTED]> > --- > > kernel/user_namespace.c |1 + > 1 file changed, 1 insertion(+) > > --- a/kernel/user_namespace.c > +++ b/kernel/user_namespace.c > @@ -81,6 +81,7 @@ void free_user_ns(struct kref *kref) > struct user_namespace *ns; > > ns = container_of(kref, struct user_namespace, kref); > + free_uid(ns->root_user); > kfree(ns); > } Indeed ... Thanks ! C. - To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH] userns: don't leak root user
Alexey Dobriyan wrote: Signed-off-by: Alexey Dobriyan [EMAIL PROTECTED] --- kernel/user_namespace.c |1 + 1 file changed, 1 insertion(+) --- a/kernel/user_namespace.c +++ b/kernel/user_namespace.c @@ -81,6 +81,7 @@ void free_user_ns(struct kref *kref) struct user_namespace *ns; ns = container_of(kref, struct user_namespace, kref); + free_uid(ns-root_user); kfree(ns); } Indeed ... Thanks ! C. - To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: 2.6.23-rc1-mm1
Mel Gorman wrote: > On (25/07/07 19:26), Len Brown didst pronounce: >> On Wednesday 25 July 2007 14:58, Andrew Morton wrote: >>> On Wed, 25 Jul 2007 13:23:04 -0400 >>> Len Brown <[EMAIL PROTECTED]> wrote: >>> Andrew, you want to re-pull the acpi tree, or do you want me to send you some patches on top of the current mm? >>> I'd appreciate a fix for this one, please - I'll drop it int he hot-fixes >>> directory as quite a few people seem to be hitting this. >> Maybe simpler for mm1 to go backwards in time rather than forwards. >> This should fix the problem at hand. >> > > I see this made it to the hot-fix directory. It fixes the problem for me > on a standalone x86 laptop. At least the kernel built and booted. I just booted a x86_64 blade with it. Thanks ! C. - To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: 2.6.23-rc1-mm1
Mel Gorman wrote: On (25/07/07 19:26), Len Brown didst pronounce: On Wednesday 25 July 2007 14:58, Andrew Morton wrote: On Wed, 25 Jul 2007 13:23:04 -0400 Len Brown [EMAIL PROTECTED] wrote: Andrew, you want to re-pull the acpi tree, or do you want me to send you some patches on top of the current mm? I'd appreciate a fix for this one, please - I'll drop it int he hot-fixes directory as quite a few people seem to be hitting this. Maybe simpler for mm1 to go backwards in time rather than forwards. This should fix the problem at hand. I see this made it to the hot-fix directory. It fixes the problem for me on a standalone x86 laptop. At least the kernel built and booted. I just booted a x86_64 blade with it. Thanks ! C. - To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/