[Patch] tmpfs fixes against 2.4.6-pre(2)

2001-07-03 Thread Christoph Rohland

Hi Linus,

This is the second part of my patches.

Writing out of a mapping of a tmpfs file into the same file can
deadlock. This is running in the -ac series since some while.

Please apply
Christoph

diff -uNr 6-pre8-fix1/include/linux/shmem_fs.h 6-pre8-fix2/include/linux/shmem_fs.h
--- 6-pre8-fix1/include/linux/shmem_fs.hSun Apr 29 20:33:00 2001
+++ 6-pre8-fix2/include/linux/shmem_fs.hTue Jul  3 09:28:13 2001
@@ -19,6 +19,7 @@
 
 struct shmem_inode_info {
spinlock_t  lock;
+   struct semaphore sem;
unsigned long   max_index;
swp_entry_t i_direct[SHMEM_NR_DIRECT]; /* for the first blocks */
swp_entry_t   **i_indirect; /* doubly indirect blocks */
diff -uNr 6-pre8-fix1/mm/shmem.c 6-pre8-fix2/mm/shmem.c
--- 6-pre8-fix1/mm/shmem.c  Tue Jul  3 08:55:20 2001
+++ 6-pre8-fix2/mm/shmem.c  Tue Jul  3 10:09:26 2001
@@ -162,6 +162,7 @@
swp_entry_t **base, **ptr, **last;
struct shmem_inode_info * info = >u.shmem_i;
 
+   down(>sem);
inode->i_ctime = inode->i_mtime = CURRENT_TIME;
spin_lock (>lock);
index = (inode->i_size + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
@@ -205,6 +206,7 @@
info->swapped -= freed;
shmem_recalc_inode(inode);
spin_unlock (>lock);
+   up(>sem);
 }
 
 static void shmem_delete_inode(struct inode * inode)
@@ -289,15 +291,12 @@
  * still need to guard against racing with shm_writepage(), which might
  * be trying to move the page to the swap cache as we run.
  */
-static struct page * shmem_getpage_locked(struct inode * inode, unsigned long idx)
+static struct page * shmem_getpage_locked(struct shmem_inode_info *info, struct inode 
+* inode, unsigned long idx)
 {
struct address_space * mapping = inode->i_mapping;
-   struct shmem_inode_info *info;
struct page * page;
swp_entry_t *entry;
 
-   info = >u.shmem_i;
-
 repeat:
page = find_lock_page(mapping, idx);
if (page)
@@ -402,6 +401,7 @@
 
 static int shmem_getpage(struct inode * inode, unsigned long idx, struct page **ptr)
 {
+   struct shmem_inode_info *info;
struct address_space * mapping = inode->i_mapping;
int error;
 
@@ -416,27 +416,28 @@
page_cache_release(*ptr);
}
 
-   down (>i_sem);
-   /* retest we may have slept */
+   info = >u.shmem_i;
+   down (>sem);
+   /* retest we may have slept */  
+
+   *ptr = ERR_PTR(-EFAULT);
if (inode->i_size < (loff_t) idx * PAGE_CACHE_SIZE)
-   goto sigbus;
-   *ptr = shmem_getpage_locked(inode, idx);
+   goto failed;
+
+   *ptr = shmem_getpage_locked(>u.shmem_i, inode, idx);
if (IS_ERR (*ptr))
goto failed;
+
UnlockPage(*ptr);
-   up (>i_sem);
+   up (>sem);
return 0;
 failed:
-   up (>i_sem);
+   up (>sem);
error = PTR_ERR(*ptr);
-   *ptr = NOPAGE_OOM;
-   if (error != -EFBIG)
-   *ptr = NOPAGE_SIGBUS;
-   return error;
-sigbus:
-   up (>i_sem);
*ptr = NOPAGE_SIGBUS;
-   return -EFAULT;
+   if (error == -ENOMEM)
+   *ptr = NOPAGE_OOM;
+   return error;
 }
 
 struct page * shmem_nopage(struct vm_area_struct * vma, unsigned long address, int 
no_share)
@@ -509,6 +510,7 @@
 struct inode *shmem_get_inode(struct super_block *sb, int mode, int dev)
 {
struct inode * inode;
+   struct shmem_inode_info *info;
 
spin_lock (>u.shmem_sb.stat_lock);
if (!sb->u.shmem_sb.free_inodes) {
@@ -528,7 +530,9 @@
inode->i_rdev = NODEV;
inode->i_mapping->a_ops = _aops;
inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
-   spin_lock_init (>u.shmem_i.lock);
+   info = >u.shmem_i;
+   spin_lock_init (>lock);
+   sema_init (>sem, 1);
switch (mode & S_IFMT) {
default:
init_special_inode(inode, mode, dev);
@@ -558,6 +562,7 @@
 shmem_file_write(struct file *file,const char *buf,size_t count,loff_t *ppos)
 {
struct inode*inode = file->f_dentry->d_inode; 
+   struct shmem_inode_info *info;
unsigned long   limit = current->rlim[RLIMIT_FSIZE].rlim_cur;
loff_t  pos;
struct page *page;
@@ -633,7 +638,11 @@
__get_user(dummy, buf+bytes-1);
}
 
-   page = shmem_getpage_locked(inode, index);
+   info = >u.shmem_i;
+   down (>sem);
+   page = shmem_getpage_locked(info, inode, index);
+   up (>sem);
+
status = PTR_ERR(page);
if (IS_ERR(page))
break;
@@ -644,7 +653,6 @@
}
 
kaddr = kmap(page);
-// can this do a truncated write? cr
status = 

[Patch] tmpfs fixes against 2.4.6-pre

2001-07-03 Thread Christoph Rohland

Hi Linus,

I split up my previous patch into two. Hopefully this is more
acceptable for you or will trigger some comments.

This is the first part:

1) shmem_remount_fs garbles parameters which are not supplied
2) shmem_truncate should check the maximum size else we get ugly
   oopses
3) shmem_file_setup should give an error if the size is too big. So
   the application will fail early. I also cleaned up the error
   handling a bit. 
4) We should recalculate the inode on page allocation. Else we get
   really weird sizes on sparse files.

Please apply
Christoph

diff -uNr 6-pre8/mm/shmem.c 6-pre8-fix1/mm/shmem.c
--- 6-pre8/mm/shmem.c   Tue Jun 12 09:49:28 2001
+++ 6-pre8-fix1/mm/shmem.c  Tue Jul  3 08:55:20 2001
@@ -3,7 +3,8 @@
  *
  * Copyright (C) 2000 Linus Torvalds.
  *  2000 Transmeta Corp.
- *  2000 Christoph Rohland
+ *  2000-2001 Christoph Rohland
+ *  2000-2001 SAP AG
  * 
  * This file is released under the GPL.
  */
@@ -33,7 +34,7 @@
 #define TMPFS_MAGIC0x01021994
 
 #define ENTRIES_PER_PAGE (PAGE_SIZE/sizeof(unsigned long))
-#define NR_SINGLE (ENTRIES_PER_PAGE + SHMEM_NR_DIRECT)
+#define SHMEM_MAX_BLOCKS (SHMEM_NR_DIRECT + ENTRIES_PER_PAGE*ENTRIES_PER_PAGE)
 
 static struct super_operations shmem_ops;
 static struct address_space_operations shmem_aops;
@@ -193,7 +194,14 @@
}
 
 out:
-   info->max_index = index;
+   /*
+* We have no chance to give an error, so we limit it to max
+* size here and the application will fail later
+*/
+   if (index > SHMEM_MAX_BLOCKS) 
+   info->max_index = SHMEM_MAX_BLOCKS;
+   else
+   info->max_index = index;
info->swapped -= freed;
shmem_recalc_inode(inode);
spin_unlock (>lock);
@@ -314,6 +322,7 @@
return page;
}

+   shmem_recalc_inode(inode);
if (entry->val) {
unsigned long flags;
 
@@ -1027,6 +1036,8 @@
unsigned long max_inodes, inodes;
struct shmem_sb_info *info = >u.shmem_sb;
 
+   max_blocks = info->max_blocks;
+   max_inodes = info->max_inodes;
if (shmem_parse_options (data, NULL, _blocks, _inodes))
return -EINVAL;
 
@@ -1074,7 +1085,7 @@
sb->u.shmem_sb.free_blocks = blocks;
sb->u.shmem_sb.max_inodes = inodes;
sb->u.shmem_sb.free_inodes = inodes;
-   sb->s_maxbytes = (unsigned long long)(SHMEM_NR_DIRECT + 
(ENTRIES_PER_PAGE*ENTRIES_PER_PAGE)) << PAGE_CACHE_SHIFT;
+   sb->s_maxbytes = (unsigned long long)SHMEM_MAX_BLOCKS << PAGE_CACHE_SHIFT;
sb->s_blocksize = PAGE_CACHE_SIZE;
sb->s_blocksize_bits = PAGE_CACHE_SHIFT;
sb->s_magic = TMPFS_MAGIC;
@@ -1282,9 +1293,11 @@
struct qstr this;
int vm_enough_memory(long pages);
 
-   error = -ENOMEM;
+   if (size > (unsigned long long) SHMEM_MAX_BLOCKS << PAGE_CACHE_SHIFT)
+   return ERR_PTR(-EINVAL);
+
if (!vm_enough_memory((size) >> PAGE_SHIFT))
-   goto out;
+   return ERR_PTR(-ENOMEM);
 
this.name = name;
this.len = strlen(name);
@@ -1292,7 +1305,7 @@
root = tmpfs_fs_type.kern_mnt->mnt_root;
dentry = d_alloc(root, );
if (!dentry)
-   goto out;
+   return ERR_PTR(-ENOMEM);
 
error = -ENFILE;
file = get_empty_filp();
@@ -1318,7 +1331,6 @@
put_filp(file);
 put_dentry:
dput (dentry);
-out:
return ERR_PTR(error);  
 }
 /*

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/



[Patch] tmpfs fixes against 2.4.6-pre

2001-07-03 Thread Christoph Rohland

Hi Linus,

I split up my previous patch into two. Hopefully this is more
acceptable for you or will trigger some comments.

This is the first part:

1) shmem_remount_fs garbles parameters which are not supplied
2) shmem_truncate should check the maximum size else we get ugly
   oopses
3) shmem_file_setup should give an error if the size is too big. So
   the application will fail early. I also cleaned up the error
   handling a bit. 
4) We should recalculate the inode on page allocation. Else we get
   really weird sizes on sparse files.

Please apply
Christoph

diff -uNr 6-pre8/mm/shmem.c 6-pre8-fix1/mm/shmem.c
--- 6-pre8/mm/shmem.c   Tue Jun 12 09:49:28 2001
+++ 6-pre8-fix1/mm/shmem.c  Tue Jul  3 08:55:20 2001
@@ -3,7 +3,8 @@
  *
  * Copyright (C) 2000 Linus Torvalds.
  *  2000 Transmeta Corp.
- *  2000 Christoph Rohland
+ *  2000-2001 Christoph Rohland
+ *  2000-2001 SAP AG
  * 
  * This file is released under the GPL.
  */
@@ -33,7 +34,7 @@
 #define TMPFS_MAGIC0x01021994
 
 #define ENTRIES_PER_PAGE (PAGE_SIZE/sizeof(unsigned long))
-#define NR_SINGLE (ENTRIES_PER_PAGE + SHMEM_NR_DIRECT)
+#define SHMEM_MAX_BLOCKS (SHMEM_NR_DIRECT + ENTRIES_PER_PAGE*ENTRIES_PER_PAGE)
 
 static struct super_operations shmem_ops;
 static struct address_space_operations shmem_aops;
@@ -193,7 +194,14 @@
}
 
 out:
-   info-max_index = index;
+   /*
+* We have no chance to give an error, so we limit it to max
+* size here and the application will fail later
+*/
+   if (index  SHMEM_MAX_BLOCKS) 
+   info-max_index = SHMEM_MAX_BLOCKS;
+   else
+   info-max_index = index;
info-swapped -= freed;
shmem_recalc_inode(inode);
spin_unlock (info-lock);
@@ -314,6 +322,7 @@
return page;
}

+   shmem_recalc_inode(inode);
if (entry-val) {
unsigned long flags;
 
@@ -1027,6 +1036,8 @@
unsigned long max_inodes, inodes;
struct shmem_sb_info *info = sb-u.shmem_sb;
 
+   max_blocks = info-max_blocks;
+   max_inodes = info-max_inodes;
if (shmem_parse_options (data, NULL, max_blocks, max_inodes))
return -EINVAL;
 
@@ -1074,7 +1085,7 @@
sb-u.shmem_sb.free_blocks = blocks;
sb-u.shmem_sb.max_inodes = inodes;
sb-u.shmem_sb.free_inodes = inodes;
-   sb-s_maxbytes = (unsigned long long)(SHMEM_NR_DIRECT + 
(ENTRIES_PER_PAGE*ENTRIES_PER_PAGE))  PAGE_CACHE_SHIFT;
+   sb-s_maxbytes = (unsigned long long)SHMEM_MAX_BLOCKS  PAGE_CACHE_SHIFT;
sb-s_blocksize = PAGE_CACHE_SIZE;
sb-s_blocksize_bits = PAGE_CACHE_SHIFT;
sb-s_magic = TMPFS_MAGIC;
@@ -1282,9 +1293,11 @@
struct qstr this;
int vm_enough_memory(long pages);
 
-   error = -ENOMEM;
+   if (size  (unsigned long long) SHMEM_MAX_BLOCKS  PAGE_CACHE_SHIFT)
+   return ERR_PTR(-EINVAL);
+
if (!vm_enough_memory((size)  PAGE_SHIFT))
-   goto out;
+   return ERR_PTR(-ENOMEM);
 
this.name = name;
this.len = strlen(name);
@@ -1292,7 +1305,7 @@
root = tmpfs_fs_type.kern_mnt-mnt_root;
dentry = d_alloc(root, this);
if (!dentry)
-   goto out;
+   return ERR_PTR(-ENOMEM);
 
error = -ENFILE;
file = get_empty_filp();
@@ -1318,7 +1331,6 @@
put_filp(file);
 put_dentry:
dput (dentry);
-out:
return ERR_PTR(error);  
 }
 /*

-
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/



[Patch] tmpfs fixes against 2.4.6-pre(2)

2001-07-03 Thread Christoph Rohland

Hi Linus,

This is the second part of my patches.

Writing out of a mapping of a tmpfs file into the same file can
deadlock. This is running in the -ac series since some while.

Please apply
Christoph

diff -uNr 6-pre8-fix1/include/linux/shmem_fs.h 6-pre8-fix2/include/linux/shmem_fs.h
--- 6-pre8-fix1/include/linux/shmem_fs.hSun Apr 29 20:33:00 2001
+++ 6-pre8-fix2/include/linux/shmem_fs.hTue Jul  3 09:28:13 2001
@@ -19,6 +19,7 @@
 
 struct shmem_inode_info {
spinlock_t  lock;
+   struct semaphore sem;
unsigned long   max_index;
swp_entry_t i_direct[SHMEM_NR_DIRECT]; /* for the first blocks */
swp_entry_t   **i_indirect; /* doubly indirect blocks */
diff -uNr 6-pre8-fix1/mm/shmem.c 6-pre8-fix2/mm/shmem.c
--- 6-pre8-fix1/mm/shmem.c  Tue Jul  3 08:55:20 2001
+++ 6-pre8-fix2/mm/shmem.c  Tue Jul  3 10:09:26 2001
@@ -162,6 +162,7 @@
swp_entry_t **base, **ptr, **last;
struct shmem_inode_info * info = inode-u.shmem_i;
 
+   down(info-sem);
inode-i_ctime = inode-i_mtime = CURRENT_TIME;
spin_lock (info-lock);
index = (inode-i_size + PAGE_CACHE_SIZE - 1)  PAGE_CACHE_SHIFT;
@@ -205,6 +206,7 @@
info-swapped -= freed;
shmem_recalc_inode(inode);
spin_unlock (info-lock);
+   up(info-sem);
 }
 
 static void shmem_delete_inode(struct inode * inode)
@@ -289,15 +291,12 @@
  * still need to guard against racing with shm_writepage(), which might
  * be trying to move the page to the swap cache as we run.
  */
-static struct page * shmem_getpage_locked(struct inode * inode, unsigned long idx)
+static struct page * shmem_getpage_locked(struct shmem_inode_info *info, struct inode 
+* inode, unsigned long idx)
 {
struct address_space * mapping = inode-i_mapping;
-   struct shmem_inode_info *info;
struct page * page;
swp_entry_t *entry;
 
-   info = inode-u.shmem_i;
-
 repeat:
page = find_lock_page(mapping, idx);
if (page)
@@ -402,6 +401,7 @@
 
 static int shmem_getpage(struct inode * inode, unsigned long idx, struct page **ptr)
 {
+   struct shmem_inode_info *info;
struct address_space * mapping = inode-i_mapping;
int error;
 
@@ -416,27 +416,28 @@
page_cache_release(*ptr);
}
 
-   down (inode-i_sem);
-   /* retest we may have slept */
+   info = inode-u.shmem_i;
+   down (info-sem);
+   /* retest we may have slept */  
+
+   *ptr = ERR_PTR(-EFAULT);
if (inode-i_size  (loff_t) idx * PAGE_CACHE_SIZE)
-   goto sigbus;
-   *ptr = shmem_getpage_locked(inode, idx);
+   goto failed;
+
+   *ptr = shmem_getpage_locked(inode-u.shmem_i, inode, idx);
if (IS_ERR (*ptr))
goto failed;
+
UnlockPage(*ptr);
-   up (inode-i_sem);
+   up (info-sem);
return 0;
 failed:
-   up (inode-i_sem);
+   up (info-sem);
error = PTR_ERR(*ptr);
-   *ptr = NOPAGE_OOM;
-   if (error != -EFBIG)
-   *ptr = NOPAGE_SIGBUS;
-   return error;
-sigbus:
-   up (inode-i_sem);
*ptr = NOPAGE_SIGBUS;
-   return -EFAULT;
+   if (error == -ENOMEM)
+   *ptr = NOPAGE_OOM;
+   return error;
 }
 
 struct page * shmem_nopage(struct vm_area_struct * vma, unsigned long address, int 
no_share)
@@ -509,6 +510,7 @@
 struct inode *shmem_get_inode(struct super_block *sb, int mode, int dev)
 {
struct inode * inode;
+   struct shmem_inode_info *info;
 
spin_lock (sb-u.shmem_sb.stat_lock);
if (!sb-u.shmem_sb.free_inodes) {
@@ -528,7 +530,9 @@
inode-i_rdev = NODEV;
inode-i_mapping-a_ops = shmem_aops;
inode-i_atime = inode-i_mtime = inode-i_ctime = CURRENT_TIME;
-   spin_lock_init (inode-u.shmem_i.lock);
+   info = inode-u.shmem_i;
+   spin_lock_init (info-lock);
+   sema_init (info-sem, 1);
switch (mode  S_IFMT) {
default:
init_special_inode(inode, mode, dev);
@@ -558,6 +562,7 @@
 shmem_file_write(struct file *file,const char *buf,size_t count,loff_t *ppos)
 {
struct inode*inode = file-f_dentry-d_inode; 
+   struct shmem_inode_info *info;
unsigned long   limit = current-rlim[RLIMIT_FSIZE].rlim_cur;
loff_t  pos;
struct page *page;
@@ -633,7 +638,11 @@
__get_user(dummy, buf+bytes-1);
}
 
-   page = shmem_getpage_locked(inode, index);
+   info = inode-u.shmem_i;
+   down (info-sem);
+   page = shmem_getpage_locked(info, inode, index);
+   up (info-sem);
+
status = PTR_ERR(page);
if (IS_ERR(page))
break;
@@ -644,7 +653,6 @@
}
 
kaddr = kmap(page);

[Patch] tmpfs/ramfs accounting

2001-07-02 Thread Christoph Rohland

Hi Alan,

here is the patch you backed out for -ac22.

I slightly changed the approach: I do not rely on removepage to
calculate the fs size any more since the special-casing was ugly and
PG_marker was dropped. But I use removepage for the shmem_nrpages
calculation.

Please apply
Christoph

diff -uNr 5-ac22/fs/ramfs/inode.c 5-ac22-fix/fs/ramfs/inode.c
--- 5-ac22/fs/ramfs/inode.c Mon Jul  2 09:13:18 2001
+++ 5-ac22-fix/fs/ramfs/inode.c Mon Jul  2 09:55:52 2001
@@ -289,7 +289,7 @@
return 0;
 }
 
-static void ramfs_truncatepage(struct page *page)
+static void ramfs_removepage(struct page *page)
 {
struct inode *inode = (struct inode *)page->mapping->host;
 
@@ -659,7 +659,7 @@
writepage:  ramfs_writepage,
prepare_write:  ramfs_prepare_write,
commit_write:   ramfs_commit_write,
-   truncatepage:   ramfs_truncatepage,
+   removepage: ramfs_removepage,
 };
 
 static struct file_operations ramfs_file_operations = {
diff -uNr 5-ac22/include/linux/fs.h 5-ac22-fix/include/linux/fs.h
--- 5-ac22/include/linux/fs.h   Mon Jul  2 09:35:39 2001
+++ 5-ac22-fix/include/linux/fs.h   Mon Jul  2 10:32:04 2001
@@ -375,7 +375,7 @@
int (*sync_page)(struct page *);
int (*prepare_write)(struct file *, struct page *, unsigned, unsigned);
int (*commit_write)(struct file *, struct page *, unsigned, unsigned);
-   void (*truncatepage)(struct page *); /* called from truncate_complete_page */
+   void (*removepage)(struct page *); /* called when page gets removed from the 
+inode */
/* Unfortunately this kludge is needed for FIBMAP. Don't use it */
int (*bmap)(struct address_space *, long);
 };
diff -uNr 5-ac22/mm/filemap.c 5-ac22-fix/mm/filemap.c
--- 5-ac22/mm/filemap.c Mon Jul  2 09:13:29 2001
+++ 5-ac22-fix/mm/filemap.c Mon Jul  2 10:22:52 2001
@@ -87,6 +87,9 @@
 {
struct address_space * mapping = page->mapping;
 
+   if (mapping->a_ops->removepage)
+   mapping->a_ops->removepage(page);
+   
mapping->nrpages--;
list_del(>list);
page->mapping = NULL;
@@ -211,9 +214,6 @@
if (!page->buffers || block_flushpage(page, 0))
lru_cache_del(page);
 
-   if (page->mapping->a_ops->truncatepage)
-   page->mapping->a_ops->truncatepage(page);
-   
/*
 * We remove the page from the page cache _after_ we have
 * destroyed all buffer-cache references to it. Otherwise some
diff -uNr 5-ac22/mm/shmem.c 5-ac22-fix/mm/shmem.c
--- 5-ac22/mm/shmem.c   Mon Jul  2 09:13:29 2001
+++ 5-ac22-fix/mm/shmem.c   Mon Jul  2 10:54:55 2001
@@ -34,6 +34,7 @@
 #define TMPFS_MAGIC0x01021994
 
 #define ENTRIES_PER_PAGE (PAGE_SIZE/sizeof(unsigned long))
+#define SHMEM_MAX_BLOCKS (SHMEM_NR_DIRECT + ENTRIES_PER_PAGE*ENTRIES_PER_PAGE)
 
 #define SHMEM_SB(sb) (>u.shmem_sb)
 
@@ -51,6 +52,11 @@
 
 #define BLOCKS_PER_PAGE (PAGE_SIZE/512)
 
+static void shmem_removepage(struct page *page)
+{
+   atomic_dec(_nrpages);
+}
+
 /*
  * shmem_recalc_inode - recalculate the size of an inode
  *
@@ -69,11 +75,9 @@
  * (inode->i_mapping->nrpages + info->swapped)
  *
  * It has to be called with the spinlock held.
- *
- * The swap parameter is a performance hack for truncate.
  */
 
-static void shmem_recalc_inode(struct inode * inode, unsigned long swap)
+static void shmem_recalc_inode(struct inode * inode)
 {
unsigned long freed;
 
@@ -85,7 +89,6 @@
spin_lock (>stat_lock);
sbinfo->free_blocks += freed;
spin_unlock (>stat_lock);
-   atomic_sub(freed-swap, _nrpages);
}
 }
 
@@ -202,7 +205,7 @@
 out:
info->max_index = index;
info->swapped -= freed;
-   shmem_recalc_inode(inode, freed);
+   shmem_recalc_inode(inode);
spin_unlock (>lock);
up(>sem);
 }
@@ -257,7 +260,7 @@
entry = shmem_swp_entry(info, page->index);
if (IS_ERR(entry))  /* this had been allocted on page allocation */
BUG();
-   shmem_recalc_inode(page->mapping->host, 0);
+   shmem_recalc_inode(page->mapping->host);
error = -EAGAIN;
if (entry->val)
BUG();
@@ -265,7 +268,6 @@
*entry = swap;
error = 0;
/* Remove the page from the page cache */
-   atomic_dec(_nrpages);
lru_cache_del(page);
remove_inode_page(page);
 
@@ -1086,6 +1088,8 @@
unsigned long max_inodes, inodes;
struct shmem_sb_info *sbinfo = SHMEM_SB(sb);
 
+   max_blocks = sbinfo->max_blocks;
+   max_inodes = sbinfo->max_inodes;
if (shmem_parse_options (data, NULL, _blocks, _inodes))
return -EINVAL;
 
@@ -1134,7 +1138,7 @@
sbinfo->free_blocks = blocks;
sbinfo->max_inodes = inodes;
sbinfo->free_inodes = inodes;
-   sb->s_maxbytes = (unsigned long long)(SHMEM_NR_DIRECT + 

[Patch] tmpfs/ramfs accounting

2001-07-02 Thread Christoph Rohland

Hi Alan,

here is the patch you backed out for -ac22.

I slightly changed the approach: I do not rely on removepage to
calculate the fs size any more since the special-casing was ugly and
PG_marker was dropped. But I use removepage for the shmem_nrpages
calculation.

Please apply
Christoph

diff -uNr 5-ac22/fs/ramfs/inode.c 5-ac22-fix/fs/ramfs/inode.c
--- 5-ac22/fs/ramfs/inode.c Mon Jul  2 09:13:18 2001
+++ 5-ac22-fix/fs/ramfs/inode.c Mon Jul  2 09:55:52 2001
@@ -289,7 +289,7 @@
return 0;
 }
 
-static void ramfs_truncatepage(struct page *page)
+static void ramfs_removepage(struct page *page)
 {
struct inode *inode = (struct inode *)page-mapping-host;
 
@@ -659,7 +659,7 @@
writepage:  ramfs_writepage,
prepare_write:  ramfs_prepare_write,
commit_write:   ramfs_commit_write,
-   truncatepage:   ramfs_truncatepage,
+   removepage: ramfs_removepage,
 };
 
 static struct file_operations ramfs_file_operations = {
diff -uNr 5-ac22/include/linux/fs.h 5-ac22-fix/include/linux/fs.h
--- 5-ac22/include/linux/fs.h   Mon Jul  2 09:35:39 2001
+++ 5-ac22-fix/include/linux/fs.h   Mon Jul  2 10:32:04 2001
@@ -375,7 +375,7 @@
int (*sync_page)(struct page *);
int (*prepare_write)(struct file *, struct page *, unsigned, unsigned);
int (*commit_write)(struct file *, struct page *, unsigned, unsigned);
-   void (*truncatepage)(struct page *); /* called from truncate_complete_page */
+   void (*removepage)(struct page *); /* called when page gets removed from the 
+inode */
/* Unfortunately this kludge is needed for FIBMAP. Don't use it */
int (*bmap)(struct address_space *, long);
 };
diff -uNr 5-ac22/mm/filemap.c 5-ac22-fix/mm/filemap.c
--- 5-ac22/mm/filemap.c Mon Jul  2 09:13:29 2001
+++ 5-ac22-fix/mm/filemap.c Mon Jul  2 10:22:52 2001
@@ -87,6 +87,9 @@
 {
struct address_space * mapping = page-mapping;
 
+   if (mapping-a_ops-removepage)
+   mapping-a_ops-removepage(page);
+   
mapping-nrpages--;
list_del(page-list);
page-mapping = NULL;
@@ -211,9 +214,6 @@
if (!page-buffers || block_flushpage(page, 0))
lru_cache_del(page);
 
-   if (page-mapping-a_ops-truncatepage)
-   page-mapping-a_ops-truncatepage(page);
-   
/*
 * We remove the page from the page cache _after_ we have
 * destroyed all buffer-cache references to it. Otherwise some
diff -uNr 5-ac22/mm/shmem.c 5-ac22-fix/mm/shmem.c
--- 5-ac22/mm/shmem.c   Mon Jul  2 09:13:29 2001
+++ 5-ac22-fix/mm/shmem.c   Mon Jul  2 10:54:55 2001
@@ -34,6 +34,7 @@
 #define TMPFS_MAGIC0x01021994
 
 #define ENTRIES_PER_PAGE (PAGE_SIZE/sizeof(unsigned long))
+#define SHMEM_MAX_BLOCKS (SHMEM_NR_DIRECT + ENTRIES_PER_PAGE*ENTRIES_PER_PAGE)
 
 #define SHMEM_SB(sb) (sb-u.shmem_sb)
 
@@ -51,6 +52,11 @@
 
 #define BLOCKS_PER_PAGE (PAGE_SIZE/512)
 
+static void shmem_removepage(struct page *page)
+{
+   atomic_dec(shmem_nrpages);
+}
+
 /*
  * shmem_recalc_inode - recalculate the size of an inode
  *
@@ -69,11 +75,9 @@
  * (inode-i_mapping-nrpages + info-swapped)
  *
  * It has to be called with the spinlock held.
- *
- * The swap parameter is a performance hack for truncate.
  */
 
-static void shmem_recalc_inode(struct inode * inode, unsigned long swap)
+static void shmem_recalc_inode(struct inode * inode)
 {
unsigned long freed;
 
@@ -85,7 +89,6 @@
spin_lock (sbinfo-stat_lock);
sbinfo-free_blocks += freed;
spin_unlock (sbinfo-stat_lock);
-   atomic_sub(freed-swap, shmem_nrpages);
}
 }
 
@@ -202,7 +205,7 @@
 out:
info-max_index = index;
info-swapped -= freed;
-   shmem_recalc_inode(inode, freed);
+   shmem_recalc_inode(inode);
spin_unlock (info-lock);
up(info-sem);
 }
@@ -257,7 +260,7 @@
entry = shmem_swp_entry(info, page-index);
if (IS_ERR(entry))  /* this had been allocted on page allocation */
BUG();
-   shmem_recalc_inode(page-mapping-host, 0);
+   shmem_recalc_inode(page-mapping-host);
error = -EAGAIN;
if (entry-val)
BUG();
@@ -265,7 +268,6 @@
*entry = swap;
error = 0;
/* Remove the page from the page cache */
-   atomic_dec(shmem_nrpages);
lru_cache_del(page);
remove_inode_page(page);
 
@@ -1086,6 +1088,8 @@
unsigned long max_inodes, inodes;
struct shmem_sb_info *sbinfo = SHMEM_SB(sb);
 
+   max_blocks = sbinfo-max_blocks;
+   max_inodes = sbinfo-max_inodes;
if (shmem_parse_options (data, NULL, max_blocks, max_inodes))
return -EINVAL;
 
@@ -1134,7 +1138,7 @@
sbinfo-free_blocks = blocks;
sbinfo-max_inodes = inodes;
sbinfo-free_inodes = inodes;
-   sb-s_maxbytes = (unsigned long long)(SHMEM_NR_DIRECT + 

Re: Shared memory quantity not being reflected by /proc/meminfo

2001-06-25 Thread Christoph Rohland

Hi Allan,

On Sun, 24 Jun 2001, Allan Duncan wrote:
> OK, it's fine by me if the "shared" under 2.2.x is not the same,
> however in that case the field should not appear at all in meminfo,
> rather than the current zero value, which leads lesser kernel
> hackers like me up the garden path.

This would probably break a lot of user space apps.

Greetings
Christoph


-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/



Re: Shared memory quantity not being reflected by /proc/meminfo

2001-06-25 Thread Christoph Rohland

Hi Allan,

On Sun, 24 Jun 2001, Allan Duncan wrote:
 OK, it's fine by me if the shared under 2.2.x is not the same,
 however in that case the field should not appear at all in meminfo,
 rather than the current zero value, which leads lesser kernel
 hackers like me up the garden path.

This would probably break a lot of user space apps.

Greetings
Christoph


-
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/



Re: Shared memory quantity not being reflected by /proc/meminfo

2001-06-24 Thread Christoph Rohland

Hi Albert,

On Sat, 23 Jun 2001, Albert D. Cahalan wrote:
> You misunderstood what 2.2.xx kernels were reporting.
> The "shared" memory in /proc/meminfo refers to something
> completely unrelated to SysV shared memory. This is no
> longer calculated because the computation was too costly.

But the load of misinterpretations and the missing value led me to
export the number of shmem pages in later -ac kernels exactly in this
field.

I know it is a change of semantics and because of this both Alan and
me asked for comments if this change is appreciated. I am still
waiting for responses though.

Greetings
Christoph


-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/



Re: Shared memory quantity not being reflected by /proc/meminfo

2001-06-24 Thread Christoph Rohland

Hi Albert,

On Sat, 23 Jun 2001, Albert D. Cahalan wrote:
 You misunderstood what 2.2.xx kernels were reporting.
 The shared memory in /proc/meminfo refers to something
 completely unrelated to SysV shared memory. This is no
 longer calculated because the computation was too costly.

But the load of misinterpretations and the missing value led me to
export the number of shmem pages in later -ac kernels exactly in this
field.

I know it is a change of semantics and because of this both Alan and
me asked for comments if this change is appreciated. I am still
waiting for responses though.

Greetings
Christoph


-
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/



[Patch] tmpfs fixes against 2.4.6-pre

2001-06-21 Thread Christoph Rohland

Hi Linus,

the appended patch fixes several tmpfs problems:

1) writing out of a mapping of a tmpfs file into the same file can
   deadlock
2) shmem_remount_fs garbles parameters which are not supplied
3) shmem_file_setup should check the maximum size

Please apply
Christoph

diff -uNr 6-pre5/include/linux/shmem_fs.h 6-pre5-fix/include/linux/shmem_fs.h
--- 6-pre5/include/linux/shmem_fs.h Sun Apr 29 20:33:00 2001
+++ 6-pre5-fix/include/linux/shmem_fs.h Thu Jun 21 15:52:25 2001
@@ -19,6 +19,7 @@
 
 struct shmem_inode_info {
spinlock_t  lock;
+   struct semaphore sem;
unsigned long   max_index;
swp_entry_t i_direct[SHMEM_NR_DIRECT]; /* for the first blocks */
swp_entry_t   **i_indirect; /* doubly indirect blocks */
diff -uNr 6-pre5/mm/shmem.c 6-pre5-fix/mm/shmem.c
--- 6-pre5/mm/shmem.c   Tue Jun 12 09:49:28 2001
+++ 6-pre5-fix/mm/shmem.c   Thu Jun 21 15:52:26 2001
@@ -3,7 +3,8 @@
  *
  * Copyright (C) 2000 Linus Torvalds.
  *  2000 Transmeta Corp.
- *  2000 Christoph Rohland
+ *  2000-2001 Christoph Rohland
+ *  2000-2001 SAP AG
  * 
  * This file is released under the GPL.
  */
@@ -33,7 +34,7 @@
 #define TMPFS_MAGIC0x01021994
 
 #define ENTRIES_PER_PAGE (PAGE_SIZE/sizeof(unsigned long))
-#define NR_SINGLE (ENTRIES_PER_PAGE + SHMEM_NR_DIRECT)
+#define SHMEM_MAX_BLOCKS (SHMEM_NR_DIRECT + ENTRIES_PER_PAGE*ENTRIES_PER_PAGE)
 
 static struct super_operations shmem_ops;
 static struct address_space_operations shmem_aops;
@@ -161,6 +162,7 @@
swp_entry_t **base, **ptr, **last;
struct shmem_inode_info * info = >u.shmem_i;
 
+   down(>sem);
inode->i_ctime = inode->i_mtime = CURRENT_TIME;
spin_lock (>lock);
index = (inode->i_size + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
@@ -193,10 +195,14 @@
}
 
 out:
-   info->max_index = index;
+   if (index <= SHMEM_MAX_BLOCKS)
+   info->max_index = index;
+   else
+   info->max_index = SHMEM_MAX_BLOCKS + 1;
info->swapped -= freed;
shmem_recalc_inode(inode);
spin_unlock (>lock);
+   up(>sem);
 }
 
 static void shmem_delete_inode(struct inode * inode)
@@ -281,15 +287,12 @@
  * still need to guard against racing with shm_writepage(), which might
  * be trying to move the page to the swap cache as we run.
  */
-static struct page * shmem_getpage_locked(struct inode * inode, unsigned long idx)
+static struct page * shmem_getpage_locked(struct shmem_inode_info *info, struct inode 
+* inode, unsigned long idx)
 {
struct address_space * mapping = inode->i_mapping;
-   struct shmem_inode_info *info;
struct page * page;
swp_entry_t *entry;
 
-   info = >u.shmem_i;
-
 repeat:
page = find_lock_page(mapping, idx);
if (page)
@@ -393,6 +396,7 @@
 
 static int shmem_getpage(struct inode * inode, unsigned long idx, struct page **ptr)
 {
+   struct shmem_inode_info *info;
struct address_space * mapping = inode->i_mapping;
int error;
 
@@ -407,27 +411,28 @@
page_cache_release(*ptr);
}
 
-   down (>i_sem);
-   /* retest we may have slept */
+   info = >u.shmem_i;
+   down (>sem);
+   /* retest we may have slept */  
+
+   *ptr = ERR_PTR(-EFAULT);
if (inode->i_size < (loff_t) idx * PAGE_CACHE_SIZE)
-   goto sigbus;
-   *ptr = shmem_getpage_locked(inode, idx);
+   goto failed;
+
+   *ptr = shmem_getpage_locked(>u.shmem_i, inode, idx);
if (IS_ERR (*ptr))
goto failed;
+
UnlockPage(*ptr);
-   up (>i_sem);
+   up (>sem);
return 0;
 failed:
-   up (>i_sem);
+   up (>sem);
error = PTR_ERR(*ptr);
-   *ptr = NOPAGE_OOM;
-   if (error != -EFBIG)
-   *ptr = NOPAGE_SIGBUS;
-   return error;
-sigbus:
-   up (>i_sem);
*ptr = NOPAGE_SIGBUS;
-   return -EFAULT;
+   if (error == -ENOMEM)
+   *ptr = NOPAGE_OOM;
+   return error;
 }
 
 struct page * shmem_nopage(struct vm_area_struct * vma, unsigned long address, int 
no_share)
@@ -500,6 +505,7 @@
 struct inode *shmem_get_inode(struct super_block *sb, int mode, int dev)
 {
struct inode * inode;
+   struct shmem_inode_info *info;
 
spin_lock (>u.shmem_sb.stat_lock);
if (!sb->u.shmem_sb.free_inodes) {
@@ -519,7 +525,9 @@
inode->i_rdev = NODEV;
inode->i_mapping->a_ops = _aops;
inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
-   spin_lock_init (>u.shmem_i.lock);
+   info = >u.shmem_i;
+   spin_lock_init (>lock);
+   sema_init (>sem, 1);
switch (mode & S_IFMT) {
   

Re: Linux 2.4.5-ac16

2001-06-21 Thread Christoph Rohland

Hi Alan,

On Tue, 19 Jun 2001, Alan Cox wrote:
> 2.4.5-ac16
> o Drop the shmem/removepage changes to see if they(me)
>   are cuaisng the instabilities in ac15

Any conclusions on that? 

Greetings
Christoph


-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/



[Patch] tmpfs fixes against 2.4.6-pre

2001-06-21 Thread Christoph Rohland

Hi Linus,

the appended patch fixes several tmpfs problems:

1) writing out of a mapping of a tmpfs file into the same file can
   deadlock
2) shmem_remount_fs garbles parameters which are not supplied
3) shmem_file_setup should check the maximum size

Please apply
Christoph

diff -uNr 6-pre5/include/linux/shmem_fs.h 6-pre5-fix/include/linux/shmem_fs.h
--- 6-pre5/include/linux/shmem_fs.h Sun Apr 29 20:33:00 2001
+++ 6-pre5-fix/include/linux/shmem_fs.h Thu Jun 21 15:52:25 2001
@@ -19,6 +19,7 @@
 
 struct shmem_inode_info {
spinlock_t  lock;
+   struct semaphore sem;
unsigned long   max_index;
swp_entry_t i_direct[SHMEM_NR_DIRECT]; /* for the first blocks */
swp_entry_t   **i_indirect; /* doubly indirect blocks */
diff -uNr 6-pre5/mm/shmem.c 6-pre5-fix/mm/shmem.c
--- 6-pre5/mm/shmem.c   Tue Jun 12 09:49:28 2001
+++ 6-pre5-fix/mm/shmem.c   Thu Jun 21 15:52:26 2001
@@ -3,7 +3,8 @@
  *
  * Copyright (C) 2000 Linus Torvalds.
  *  2000 Transmeta Corp.
- *  2000 Christoph Rohland
+ *  2000-2001 Christoph Rohland
+ *  2000-2001 SAP AG
  * 
  * This file is released under the GPL.
  */
@@ -33,7 +34,7 @@
 #define TMPFS_MAGIC0x01021994
 
 #define ENTRIES_PER_PAGE (PAGE_SIZE/sizeof(unsigned long))
-#define NR_SINGLE (ENTRIES_PER_PAGE + SHMEM_NR_DIRECT)
+#define SHMEM_MAX_BLOCKS (SHMEM_NR_DIRECT + ENTRIES_PER_PAGE*ENTRIES_PER_PAGE)
 
 static struct super_operations shmem_ops;
 static struct address_space_operations shmem_aops;
@@ -161,6 +162,7 @@
swp_entry_t **base, **ptr, **last;
struct shmem_inode_info * info = inode-u.shmem_i;
 
+   down(info-sem);
inode-i_ctime = inode-i_mtime = CURRENT_TIME;
spin_lock (info-lock);
index = (inode-i_size + PAGE_CACHE_SIZE - 1)  PAGE_CACHE_SHIFT;
@@ -193,10 +195,14 @@
}
 
 out:
-   info-max_index = index;
+   if (index = SHMEM_MAX_BLOCKS)
+   info-max_index = index;
+   else
+   info-max_index = SHMEM_MAX_BLOCKS + 1;
info-swapped -= freed;
shmem_recalc_inode(inode);
spin_unlock (info-lock);
+   up(info-sem);
 }
 
 static void shmem_delete_inode(struct inode * inode)
@@ -281,15 +287,12 @@
  * still need to guard against racing with shm_writepage(), which might
  * be trying to move the page to the swap cache as we run.
  */
-static struct page * shmem_getpage_locked(struct inode * inode, unsigned long idx)
+static struct page * shmem_getpage_locked(struct shmem_inode_info *info, struct inode 
+* inode, unsigned long idx)
 {
struct address_space * mapping = inode-i_mapping;
-   struct shmem_inode_info *info;
struct page * page;
swp_entry_t *entry;
 
-   info = inode-u.shmem_i;
-
 repeat:
page = find_lock_page(mapping, idx);
if (page)
@@ -393,6 +396,7 @@
 
 static int shmem_getpage(struct inode * inode, unsigned long idx, struct page **ptr)
 {
+   struct shmem_inode_info *info;
struct address_space * mapping = inode-i_mapping;
int error;
 
@@ -407,27 +411,28 @@
page_cache_release(*ptr);
}
 
-   down (inode-i_sem);
-   /* retest we may have slept */
+   info = inode-u.shmem_i;
+   down (info-sem);
+   /* retest we may have slept */  
+
+   *ptr = ERR_PTR(-EFAULT);
if (inode-i_size  (loff_t) idx * PAGE_CACHE_SIZE)
-   goto sigbus;
-   *ptr = shmem_getpage_locked(inode, idx);
+   goto failed;
+
+   *ptr = shmem_getpage_locked(inode-u.shmem_i, inode, idx);
if (IS_ERR (*ptr))
goto failed;
+
UnlockPage(*ptr);
-   up (inode-i_sem);
+   up (info-sem);
return 0;
 failed:
-   up (inode-i_sem);
+   up (info-sem);
error = PTR_ERR(*ptr);
-   *ptr = NOPAGE_OOM;
-   if (error != -EFBIG)
-   *ptr = NOPAGE_SIGBUS;
-   return error;
-sigbus:
-   up (inode-i_sem);
*ptr = NOPAGE_SIGBUS;
-   return -EFAULT;
+   if (error == -ENOMEM)
+   *ptr = NOPAGE_OOM;
+   return error;
 }
 
 struct page * shmem_nopage(struct vm_area_struct * vma, unsigned long address, int 
no_share)
@@ -500,6 +505,7 @@
 struct inode *shmem_get_inode(struct super_block *sb, int mode, int dev)
 {
struct inode * inode;
+   struct shmem_inode_info *info;
 
spin_lock (sb-u.shmem_sb.stat_lock);
if (!sb-u.shmem_sb.free_inodes) {
@@ -519,7 +525,9 @@
inode-i_rdev = NODEV;
inode-i_mapping-a_ops = shmem_aops;
inode-i_atime = inode-i_mtime = inode-i_ctime = CURRENT_TIME;
-   spin_lock_init (inode-u.shmem_i.lock);
+   info = inode-u.shmem_i;
+   spin_lock_init (info-lock);
+   sema_init (info-sem, 1);
switch (mode  S_IFMT) {
default:
init_special_inode

Re: Linux 2.4.5-ac14

2001-06-15 Thread Christoph Rohland

Hi Dieter,

On Fri, 15 Jun 2001, Dieter Nützel wrote:
> I see 4.29 GB under shm with your latest try.
> something wrong?

Yes, this is nasty. The appended patch fixes that. (I am not really
happy to need the PG_marker flag for writepage.)

The patch also fixes two other problems:
- shmem_file_setup has to check the given size. Else we can corrupt
  kernel memory on 64bit machines. (Thanks to Oliver Paukstadt for
  detecting this)
- shmem_remount_fs does not initialize the parameters and thus
  corrupts the sizes (detected by Joris van Rantwijk)

Alan, please apply.

Greetings
Christoph

diff -uNr 5-ac14/include/linux/mm.h 5-ac14-fix/include/linux/mm.h
--- 5-ac14/include/linux/mm.h   Fri Jun 15 10:37:21 2001
+++ 5-ac14-fix/include/linux/mm.h   Fri Jun 15 11:24:06 2001
@@ -357,6 +357,7 @@
 
 #define PageMarker(page)   test_bit(PG_marker, &(page)->flags)
 #define SetPageMarker(page)set_bit(PG_marker, &(page)->flags)
+#define ClearPageMarker(page)  clear_bit(PG_marker, &(page)->flags)
 
 #ifdef CONFIG_HIGHMEM
 #define PageHighMem(page)  test_bit(PG_highmem, &(page)->flags)
diff -uNr 5-ac14/mm/shmem.c 5-ac14-fix/mm/shmem.c
--- 5-ac14/mm/shmem.c   Fri Jun 15 10:09:21 2001
+++ 5-ac14-fix/mm/shmem.c   Fri Jun 15 11:37:44 2001
@@ -34,6 +34,7 @@
 #define TMPFS_MAGIC0x01021994
 
 #define ENTRIES_PER_PAGE (PAGE_SIZE/sizeof(unsigned long))
+#define SHMEM_MAX_BLOCKS (SHMEM_NR_DIRECT + ENTRIES_PER_PAGE*ENTRIES_PER_PAGE)
 
 #define SHMEM_SB(sb) (>u.shmem_sb)
 
@@ -56,10 +57,12 @@
struct inode *inode = (struct inode *)page->mapping->host;
struct shmem_sb_info * sbinfo = SHMEM_SB(inode->i_sb);
 
-   inode->i_blocks -= BLOCKS_PER_PAGE;
-   spin_lock (>stat_lock);
-   sbinfo->free_blocks++;
-   spin_unlock (>stat_lock);
+   if (!PageMarker(page)) {
+   inode->i_blocks -= BLOCKS_PER_PAGE;
+   spin_lock (>stat_lock);
+   sbinfo->free_blocks++;
+   spin_unlock (>stat_lock);
+   }
atomic_dec(_nrpages);
 }
 
@@ -241,9 +244,10 @@
*entry = swap;
error = 0;
/* Remove the page from the page cache */
-   atomic_dec(_nrpages);
lru_cache_del(page);
+   SetPageMarker(page);
remove_inode_page(page);
+   ClearPageMarker(page);
 
/* Add it to the swap cache */
add_to_swap_cache(page, swap);
@@ -1062,6 +1066,8 @@
unsigned long max_inodes, inodes;
struct shmem_sb_info *sbinfo = SHMEM_SB(sb);
 
+   max_blocks = sbinfo->max_blocks;
+   max_inodes = sbinfo->max_inodes;
if (shmem_parse_options (data, NULL, _blocks, _inodes))
return -EINVAL;
 
@@ -1110,7 +1116,7 @@
sbinfo->free_blocks = blocks;
sbinfo->max_inodes = inodes;
sbinfo->free_inodes = inodes;
-   sb->s_maxbytes = (unsigned long long)(SHMEM_NR_DIRECT + 
(ENTRIES_PER_PAGE*ENTRIES_PER_PAGE)) << PAGE_CACHE_SHIFT;
+   sb->s_maxbytes = (unsigned long long) SHMEM_MAX_BLOCKS << PAGE_CACHE_SHIFT;
sb->s_blocksize = PAGE_CACHE_SIZE;
sb->s_blocksize_bits = PAGE_CACHE_SHIFT;
sb->s_magic = TMPFS_MAGIC;
@@ -1311,9 +1317,11 @@
struct qstr this;
int vm_enough_memory(long pages);
 
-   error = -ENOMEM;
+   if (size > (unsigned long long) SHMEM_MAX_BLOCKS << PAGE_CACHE_SHIFT)
+   return ERR_PTR(-EINVAL);
+
if (!vm_enough_memory((size) >> PAGE_SHIFT))
-   goto out;
+   return ERR_PTR(-ENOMEM);
 
this.name = name;
this.len = strlen(name);
@@ -1321,7 +1329,7 @@
root = tmpfs_fs_type.kern_mnt->mnt_root;
dentry = d_alloc(root, );
if (!dentry)
-   goto out;
+   return ERR_PTR(-ENOMEM);
 
error = -ENFILE;
file = get_empty_filp();
@@ -1347,7 +1355,6 @@
put_filp(file);
 put_dentry:
dput (dentry);
-out:
return ERR_PTR(error);  
 }
 /*

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/



Re: Linux 2.4.5-ac14

2001-06-15 Thread Christoph Rohland

Hi Dieter,

On Fri, 15 Jun 2001, Dieter Nützel wrote:
 I see 4.29 GB under shm with your latest try.
 something wrong?

Yes, this is nasty. The appended patch fixes that. (I am not really
happy to need the PG_marker flag for writepage.)

The patch also fixes two other problems:
- shmem_file_setup has to check the given size. Else we can corrupt
  kernel memory on 64bit machines. (Thanks to Oliver Paukstadt for
  detecting this)
- shmem_remount_fs does not initialize the parameters and thus
  corrupts the sizes (detected by Joris van Rantwijk)

Alan, please apply.

Greetings
Christoph

diff -uNr 5-ac14/include/linux/mm.h 5-ac14-fix/include/linux/mm.h
--- 5-ac14/include/linux/mm.h   Fri Jun 15 10:37:21 2001
+++ 5-ac14-fix/include/linux/mm.h   Fri Jun 15 11:24:06 2001
@@ -357,6 +357,7 @@
 
 #define PageMarker(page)   test_bit(PG_marker, (page)-flags)
 #define SetPageMarker(page)set_bit(PG_marker, (page)-flags)
+#define ClearPageMarker(page)  clear_bit(PG_marker, (page)-flags)
 
 #ifdef CONFIG_HIGHMEM
 #define PageHighMem(page)  test_bit(PG_highmem, (page)-flags)
diff -uNr 5-ac14/mm/shmem.c 5-ac14-fix/mm/shmem.c
--- 5-ac14/mm/shmem.c   Fri Jun 15 10:09:21 2001
+++ 5-ac14-fix/mm/shmem.c   Fri Jun 15 11:37:44 2001
@@ -34,6 +34,7 @@
 #define TMPFS_MAGIC0x01021994
 
 #define ENTRIES_PER_PAGE (PAGE_SIZE/sizeof(unsigned long))
+#define SHMEM_MAX_BLOCKS (SHMEM_NR_DIRECT + ENTRIES_PER_PAGE*ENTRIES_PER_PAGE)
 
 #define SHMEM_SB(sb) (sb-u.shmem_sb)
 
@@ -56,10 +57,12 @@
struct inode *inode = (struct inode *)page-mapping-host;
struct shmem_sb_info * sbinfo = SHMEM_SB(inode-i_sb);
 
-   inode-i_blocks -= BLOCKS_PER_PAGE;
-   spin_lock (sbinfo-stat_lock);
-   sbinfo-free_blocks++;
-   spin_unlock (sbinfo-stat_lock);
+   if (!PageMarker(page)) {
+   inode-i_blocks -= BLOCKS_PER_PAGE;
+   spin_lock (sbinfo-stat_lock);
+   sbinfo-free_blocks++;
+   spin_unlock (sbinfo-stat_lock);
+   }
atomic_dec(shmem_nrpages);
 }
 
@@ -241,9 +244,10 @@
*entry = swap;
error = 0;
/* Remove the page from the page cache */
-   atomic_dec(shmem_nrpages);
lru_cache_del(page);
+   SetPageMarker(page);
remove_inode_page(page);
+   ClearPageMarker(page);
 
/* Add it to the swap cache */
add_to_swap_cache(page, swap);
@@ -1062,6 +1066,8 @@
unsigned long max_inodes, inodes;
struct shmem_sb_info *sbinfo = SHMEM_SB(sb);
 
+   max_blocks = sbinfo-max_blocks;
+   max_inodes = sbinfo-max_inodes;
if (shmem_parse_options (data, NULL, max_blocks, max_inodes))
return -EINVAL;
 
@@ -1110,7 +1116,7 @@
sbinfo-free_blocks = blocks;
sbinfo-max_inodes = inodes;
sbinfo-free_inodes = inodes;
-   sb-s_maxbytes = (unsigned long long)(SHMEM_NR_DIRECT + 
(ENTRIES_PER_PAGE*ENTRIES_PER_PAGE))  PAGE_CACHE_SHIFT;
+   sb-s_maxbytes = (unsigned long long) SHMEM_MAX_BLOCKS  PAGE_CACHE_SHIFT;
sb-s_blocksize = PAGE_CACHE_SIZE;
sb-s_blocksize_bits = PAGE_CACHE_SHIFT;
sb-s_magic = TMPFS_MAGIC;
@@ -1311,9 +1317,11 @@
struct qstr this;
int vm_enough_memory(long pages);
 
-   error = -ENOMEM;
+   if (size  (unsigned long long) SHMEM_MAX_BLOCKS  PAGE_CACHE_SHIFT)
+   return ERR_PTR(-EINVAL);
+
if (!vm_enough_memory((size)  PAGE_SHIFT))
-   goto out;
+   return ERR_PTR(-ENOMEM);
 
this.name = name;
this.len = strlen(name);
@@ -1321,7 +1329,7 @@
root = tmpfs_fs_type.kern_mnt-mnt_root;
dentry = d_alloc(root, this);
if (!dentry)
-   goto out;
+   return ERR_PTR(-ENOMEM);
 
error = -ENFILE;
file = get_empty_filp();
@@ -1347,7 +1355,6 @@
put_filp(file);
 put_dentry:
dput (dentry);
-out:
return ERR_PTR(error);  
 }
 /*

-
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/



[Patch] 2.4.5-ac13 ramfs and tmpfs accounting

2001-06-13 Thread Christoph Rohland

Hi Alan,

ramfs accounting does not get notified when a clean page gets dropped
from the inode.

Also tmpfs should use the new function to do accurate accounting. Else
the cached field in -ac will get spurious negative values.

The following patch fixes both.

Greetings
Christoph

diff -uNr 5-ac13/fs/ramfs/inode.c 5-ac13-a/fs/ramfs/inode.c
--- 5-ac13/fs/ramfs/inode.c Tue Jun 12 09:51:39 2001
+++ 5-ac13-a/fs/ramfs/inode.c   Wed Jun 13 09:54:22 2001
@@ -289,7 +289,7 @@
return 0;
 }
 
-static void ramfs_truncatepage(struct page *page)
+static void ramfs_removepage(struct page *page)
 {
struct inode *inode = (struct inode *)page->mapping->host;
 
@@ -659,7 +659,7 @@
writepage:  ramfs_writepage,
prepare_write:  ramfs_prepare_write,
commit_write:   ramfs_commit_write,
-   truncatepage:   ramfs_truncatepage,
+   removepage: ramfs_removepage,
 };
 
 static struct file_operations ramfs_file_operations = {
diff -uNr 5-ac13/include/linux/fs.h 5-ac13-a/include/linux/fs.h
--- 5-ac13/include/linux/fs.h   Tue Jun 12 17:34:25 2001
+++ 5-ac13-a/include/linux/fs.h Wed Jun 13 10:23:48 2001
@@ -368,7 +368,7 @@
int (*sync_page)(struct page *);
int (*prepare_write)(struct file *, struct page *, unsigned, unsigned);
int (*commit_write)(struct file *, struct page *, unsigned, unsigned);
-   void (*truncatepage)(struct page *); /* called from truncate_complete_page */
+   void (*removepage)(struct page *); /* called when page gets removed from the 
+inode */
/* Unfortunately this kludge is needed for FIBMAP. Don't use it */
int (*bmap)(struct address_space *, long);
 };
diff -uNr 5-ac13/mm/filemap.c 5-ac13-a/mm/filemap.c
--- 5-ac13/mm/filemap.c Tue Jun 12 09:51:45 2001
+++ 5-ac13-a/mm/filemap.c   Wed Jun 13 09:56:43 2001
@@ -82,6 +82,9 @@
 {
struct address_space * mapping = page->mapping;
 
+   if (mapping->a_ops->removepage)
+   mapping->a_ops->removepage(page);
+   
mapping->nrpages--;
list_del(>list);
page->mapping = NULL;
@@ -206,9 +209,6 @@
if (!page->buffers || block_flushpage(page, 0))
lru_cache_del(page);
 
-   if (page->mapping->a_ops->truncatepage)
-   page->mapping->a_ops->truncatepage(page);
-   
/*
 * We remove the page from the page cache _after_ we have
 * destroyed all buffer-cache references to it. Otherwise some
diff -uNr 5-ac13/mm/shmem.c 5-ac13-a/mm/shmem.c
--- 5-ac13/mm/shmem.c   Tue Jun 12 09:51:45 2001
+++ 5-ac13-a/mm/shmem.c Wed Jun 13 09:56:20 2001
@@ -51,42 +51,16 @@
 
 #define BLOCKS_PER_PAGE (PAGE_SIZE/512)
 
-/*
- * shmem_recalc_inode - recalculate the size of an inode
- *
- * @inode: inode to recalc
- * @swap:  additional swap pages freed externally
- *
- * We have to calculate the free blocks since the mm can drop pages
- * behind our back
- *
- * But we know that normally
- * inodes->i_blocks/BLOCKS_PER_PAGE == 
- * inode->i_mapping->nrpages + info->swapped
- *
- * So the mm freed 
- * inodes->i_blocks/BLOCKS_PER_PAGE - 
- * (inode->i_mapping->nrpages + info->swapped)
- *
- * It has to be called with the spinlock held.
- *
- * The swap parameter is a performance hack for truncate.
- */
-
-static void shmem_recalc_inode(struct inode * inode, unsigned long swap)
+static void shmem_removepage(struct page *page)
 {
-   unsigned long freed;
+   struct inode *inode = (struct inode *)page->mapping->host;
+   struct shmem_sb_info * sbinfo = SHMEM_SB(inode->i_sb);
 
-   freed = (inode->i_blocks/BLOCKS_PER_PAGE) -
-   (inode->i_mapping->nrpages + SHMEM_I(inode)->swapped);
-   if (freed){
-   struct shmem_sb_info * sbinfo = SHMEM_SB(inode->i_sb);
-   inode->i_blocks -= freed*BLOCKS_PER_PAGE;
-   spin_lock (>stat_lock);
-   sbinfo->free_blocks += freed;
-   spin_unlock (>stat_lock);
-   atomic_sub(freed-swap, _nrpages);
-   }
+   inode->i_blocks -= BLOCKS_PER_PAGE;
+   spin_lock (>stat_lock);
+   sbinfo->free_blocks++;
+   spin_unlock (>stat_lock);
+   atomic_dec(_nrpages);
 }
 
 static swp_entry_t * shmem_swp_entry (struct shmem_inode_info *info, unsigned long 
index) 
@@ -166,6 +140,7 @@
unsigned long freed = 0;
swp_entry_t **base, **ptr, **last;
struct shmem_inode_info * info = SHMEM_I(inode);
+   struct shmem_sb_info * sbinfo = SHMEM_SB(inode->i_sb);
 
down(>sem);
inode->i_ctime = inode->i_mtime = CURRENT_TIME;
@@ -202,7 +177,9 @@
 out:
info->max_index = index;
info->swapped -= freed;
-   shmem_recalc_inode(inode, freed);
+   spin_lock(>stat_lock);
+   sbinfo->free_blocks += freed;
+   spin_unlock(>stat_lock);
spin_unlock (>lock);
up(>sem);
 }
@@ -257,7 +234,6 @@
entry = shmem_swp_entry(info, 

Re: DoS using tmpfs

2001-06-13 Thread Christoph Rohland

Hi Pavel,

On Fri, 8 Jun 2001, Pavel Roskin wrote:
> Hello!
> 
> It appears that a system with tmpfs mounted with the default (!!!)
> parameters can be used by ordinary users to make the system
> non-functional.

...

> 1) tmpfs, as opposed to ramfs doesn't limit the usage by
>default. It's not a good default for a filesystem designed for
>temporary files.

Yes, use the size parameter. And no, ramfs has no resource limits in
the stock kernel at all. In -ac it limits to half the size of the
physical RAM unconditionally. But that's not useful for tmpfs simce
this uses swap also. So it is the admins task to add a size
parameter. I would love to add a size paramater in percent of virtual
memory but this would need some changes in the swapon/off coding.

> 2) Not delivering SIGINT to processes is probably not the best
>behavior if the memory if low. However, one could argue that some
>processes would use even more resources if they get control with
>SIGINT.
> 
> 3) All swap in the system was exhausted and yet tmpfs didn't return
>ENOSPC to "dd".

That the kernel locks up is IMHO a mm fault. tmpfs allocates its pages
with GFP_USER and will return an error if this fails. Apparently it
never fails but locks up.

Greetings
Christoph


-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/



Re: DoS using tmpfs

2001-06-13 Thread Christoph Rohland

Hi Pavel,

On Fri, 8 Jun 2001, Pavel Roskin wrote:
 Hello!
 
 It appears that a system with tmpfs mounted with the default (!!!)
 parameters can be used by ordinary users to make the system
 non-functional.

...

 1) tmpfs, as opposed to ramfs doesn't limit the usage by
default. It's not a good default for a filesystem designed for
temporary files.

Yes, use the size parameter. And no, ramfs has no resource limits in
the stock kernel at all. In -ac it limits to half the size of the
physical RAM unconditionally. But that's not useful for tmpfs simce
this uses swap also. So it is the admins task to add a size
parameter. I would love to add a size paramater in percent of virtual
memory but this would need some changes in the swapon/off coding.

 2) Not delivering SIGINT to processes is probably not the best
behavior if the memory if low. However, one could argue that some
processes would use even more resources if they get control with
SIGINT.
 
 3) All swap in the system was exhausted and yet tmpfs didn't return
ENOSPC to dd.

That the kernel locks up is IMHO a mm fault. tmpfs allocates its pages
with GFP_USER and will return an error if this fails. Apparently it
never fails but locks up.

Greetings
Christoph


-
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/



[Patch] 2.4.5-ac13 ramfs and tmpfs accounting

2001-06-13 Thread Christoph Rohland

Hi Alan,

ramfs accounting does not get notified when a clean page gets dropped
from the inode.

Also tmpfs should use the new function to do accurate accounting. Else
the cached field in -ac will get spurious negative values.

The following patch fixes both.

Greetings
Christoph

diff -uNr 5-ac13/fs/ramfs/inode.c 5-ac13-a/fs/ramfs/inode.c
--- 5-ac13/fs/ramfs/inode.c Tue Jun 12 09:51:39 2001
+++ 5-ac13-a/fs/ramfs/inode.c   Wed Jun 13 09:54:22 2001
@@ -289,7 +289,7 @@
return 0;
 }
 
-static void ramfs_truncatepage(struct page *page)
+static void ramfs_removepage(struct page *page)
 {
struct inode *inode = (struct inode *)page-mapping-host;
 
@@ -659,7 +659,7 @@
writepage:  ramfs_writepage,
prepare_write:  ramfs_prepare_write,
commit_write:   ramfs_commit_write,
-   truncatepage:   ramfs_truncatepage,
+   removepage: ramfs_removepage,
 };
 
 static struct file_operations ramfs_file_operations = {
diff -uNr 5-ac13/include/linux/fs.h 5-ac13-a/include/linux/fs.h
--- 5-ac13/include/linux/fs.h   Tue Jun 12 17:34:25 2001
+++ 5-ac13-a/include/linux/fs.h Wed Jun 13 10:23:48 2001
@@ -368,7 +368,7 @@
int (*sync_page)(struct page *);
int (*prepare_write)(struct file *, struct page *, unsigned, unsigned);
int (*commit_write)(struct file *, struct page *, unsigned, unsigned);
-   void (*truncatepage)(struct page *); /* called from truncate_complete_page */
+   void (*removepage)(struct page *); /* called when page gets removed from the 
+inode */
/* Unfortunately this kludge is needed for FIBMAP. Don't use it */
int (*bmap)(struct address_space *, long);
 };
diff -uNr 5-ac13/mm/filemap.c 5-ac13-a/mm/filemap.c
--- 5-ac13/mm/filemap.c Tue Jun 12 09:51:45 2001
+++ 5-ac13-a/mm/filemap.c   Wed Jun 13 09:56:43 2001
@@ -82,6 +82,9 @@
 {
struct address_space * mapping = page-mapping;
 
+   if (mapping-a_ops-removepage)
+   mapping-a_ops-removepage(page);
+   
mapping-nrpages--;
list_del(page-list);
page-mapping = NULL;
@@ -206,9 +209,6 @@
if (!page-buffers || block_flushpage(page, 0))
lru_cache_del(page);
 
-   if (page-mapping-a_ops-truncatepage)
-   page-mapping-a_ops-truncatepage(page);
-   
/*
 * We remove the page from the page cache _after_ we have
 * destroyed all buffer-cache references to it. Otherwise some
diff -uNr 5-ac13/mm/shmem.c 5-ac13-a/mm/shmem.c
--- 5-ac13/mm/shmem.c   Tue Jun 12 09:51:45 2001
+++ 5-ac13-a/mm/shmem.c Wed Jun 13 09:56:20 2001
@@ -51,42 +51,16 @@
 
 #define BLOCKS_PER_PAGE (PAGE_SIZE/512)
 
-/*
- * shmem_recalc_inode - recalculate the size of an inode
- *
- * @inode: inode to recalc
- * @swap:  additional swap pages freed externally
- *
- * We have to calculate the free blocks since the mm can drop pages
- * behind our back
- *
- * But we know that normally
- * inodes-i_blocks/BLOCKS_PER_PAGE == 
- * inode-i_mapping-nrpages + info-swapped
- *
- * So the mm freed 
- * inodes-i_blocks/BLOCKS_PER_PAGE - 
- * (inode-i_mapping-nrpages + info-swapped)
- *
- * It has to be called with the spinlock held.
- *
- * The swap parameter is a performance hack for truncate.
- */
-
-static void shmem_recalc_inode(struct inode * inode, unsigned long swap)
+static void shmem_removepage(struct page *page)
 {
-   unsigned long freed;
+   struct inode *inode = (struct inode *)page-mapping-host;
+   struct shmem_sb_info * sbinfo = SHMEM_SB(inode-i_sb);
 
-   freed = (inode-i_blocks/BLOCKS_PER_PAGE) -
-   (inode-i_mapping-nrpages + SHMEM_I(inode)-swapped);
-   if (freed){
-   struct shmem_sb_info * sbinfo = SHMEM_SB(inode-i_sb);
-   inode-i_blocks -= freed*BLOCKS_PER_PAGE;
-   spin_lock (sbinfo-stat_lock);
-   sbinfo-free_blocks += freed;
-   spin_unlock (sbinfo-stat_lock);
-   atomic_sub(freed-swap, shmem_nrpages);
-   }
+   inode-i_blocks -= BLOCKS_PER_PAGE;
+   spin_lock (sbinfo-stat_lock);
+   sbinfo-free_blocks++;
+   spin_unlock (sbinfo-stat_lock);
+   atomic_dec(shmem_nrpages);
 }
 
 static swp_entry_t * shmem_swp_entry (struct shmem_inode_info *info, unsigned long 
index) 
@@ -166,6 +140,7 @@
unsigned long freed = 0;
swp_entry_t **base, **ptr, **last;
struct shmem_inode_info * info = SHMEM_I(inode);
+   struct shmem_sb_info * sbinfo = SHMEM_SB(inode-i_sb);
 
down(info-sem);
inode-i_ctime = inode-i_mtime = CURRENT_TIME;
@@ -202,7 +177,9 @@
 out:
info-max_index = index;
info-swapped -= freed;
-   shmem_recalc_inode(inode, freed);
+   spin_lock(sbinfo-stat_lock);
+   sbinfo-free_blocks += freed;
+   spin_unlock(sbinfo-stat_lock);
spin_unlock (info-lock);
up(info-sem);
 }
@@ -257,7 +234,6 @@
entry = 

Re: unused shared memory is written into core dump - bug or feature?

2001-06-12 Thread Christoph Rohland

Hi Peter,

On Tue, 12 Jun 2001, Peter Niemayer wrote:
> I just noticed that when I attach some SYSV shared memory segments
> to my process and then that process dies from a SIGSEGV that _all_
> the shared memory is dumped into the core file, even if it was never
> used and therefore didn't show up in any of the memory statistics.

Fixed in recent kernel versions (2.2 and 2.4). It will create sparse
files and not touch the unused address space.

Greetings
Christoph


-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/



Re: unused shared memory is written into core dump - bug or feature?

2001-06-12 Thread Christoph Rohland

Hi Peter,

On Tue, 12 Jun 2001, Peter Niemayer wrote:
 I just noticed that when I attach some SYSV shared memory segments
 to my process and then that process dies from a SIGSEGV that _all_
 the shared memory is dumped into the core file, even if it was never
 used and therefore didn't show up in any of the memory statistics.

Fixed in recent kernel versions (2.2 and 2.4). It will create sparse
files and not touch the unused address space.

Greetings
Christoph


-
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/



Re: tmpfs + sendfile bug ?

2001-05-22 Thread Christoph Rohland

Hi Linus,

On Mon, 21 May 2001, Linus Torvalds wrote:
> In article <[EMAIL PROTECTED]>, Christoph Rohland
> <[EMAIL PROTECTED]> wrote:
>>
>>tmpfs does not provide the necessary functions for sendfile and lo:
>>readpage, prepare_write and commitwrite.
>>
>>And I do not see a way how to provide readpage in tmpfs :-(
> 
> Why not just do it the same way ramfs does?
> 
> If you don't have any backing store, you know that the page is
> empty. If you _do_ have backing store, a readpage() won't be
> called. Ergo:

AFAIU readpage is fine as long as there is no backing store. But if
the page is in the swap cache, the lookup of the page in the page
cache will fail; generic_file_read, loop, sendfile will allocate a
page and call readpage with that. Now readpage has to copy the swap
cache page over to this page :-(

IMHO Copying on swapin is really not worth the additional
functionality.

Did I miss something?

Christoph


-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/



Re: tmpfs + sendfile bug ?

2001-05-22 Thread Christoph Rohland

Hi Linus,

On Mon, 21 May 2001, Linus Torvalds wrote:
 In article [EMAIL PROTECTED], Christoph Rohland
 [EMAIL PROTECTED] wrote:

tmpfs does not provide the necessary functions for sendfile and lo:
readpage, prepare_write and commitwrite.

And I do not see a way how to provide readpage in tmpfs :-(
 
 Why not just do it the same way ramfs does?
 
 If you don't have any backing store, you know that the page is
 empty. If you _do_ have backing store, a readpage() won't be
 called. Ergo:

AFAIU readpage is fine as long as there is no backing store. But if
the page is in the swap cache, the lookup of the page in the page
cache will fail; generic_file_read, loop, sendfile will allocate a
page and call readpage with that. Now readpage has to copy the swap
cache page over to this page :-(

IMHO Copying on swapin is really not worth the additional
functionality.

Did I miss something?

Christoph


-
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/



Re: tmpfs + sendfile bug ?

2001-05-21 Thread Christoph Rohland

Hi Pierre,

On Mon, 21 May 2001, Pierre Etchemaite wrote:
> I just found a problem GETting a file stored in tmpfs using proftpd;
> I always get a "426 Transfer aborted. Data connection closed."
> 
> That could be a bug with tmpfs and sendfile in 2.4.5-pre4 :
> 
> [...]
> read(8, "%PDF-1.4\r%\342\343\317\323\r\n870 0 obj\r<< \r/L"..., 8192) = 8192
> shmat(11, 0x4cfe65, 0x3)= 0xb4d4
> sendfile(11, 8, [0], 5045861)   = -1 EINVAL (Invalid argument)
> [...]
> 
> Any idea ?

That's probably the same reason why tmpfs and loopback do not work
together:

tmpfs does not provide the necessary functions for sendfile and lo:
readpage, prepare_write and commitwrite.

And I do not see a way how to provide readpage in tmpfs :-(

Greetings
Christoph


-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/



Re: tmpfs + sendfile bug ?

2001-05-21 Thread Christoph Rohland

Hi Pierre,

On Mon, 21 May 2001, Pierre Etchemaite wrote:
 I just found a problem GETting a file stored in tmpfs using proftpd;
 I always get a 426 Transfer aborted. Data connection closed.
 
 That could be a bug with tmpfs and sendfile in 2.4.5-pre4 :
 
 [...]
 read(8, %PDF-1.4\r%\342\343\317\323\r\n870 0 obj\r \r/L..., 8192) = 8192
 shmat(11, 0x4cfe65, 0x3)= 0xb4d4
 sendfile(11, 8, [0], 5045861)   = -1 EINVAL (Invalid argument)
 [...]
 
 Any idea ?

That's probably the same reason why tmpfs and loopback do not work
together:

tmpfs does not provide the necessary functions for sendfile and lo:
readpage, prepare_write and commitwrite.

And I do not see a way how to provide readpage in tmpfs :-(

Greetings
Christoph


-
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/



[Patch] tmpfs accounting cleanup for -ac series

2001-05-17 Thread Christoph Rohland

Hi Alan,

While looking at the -ac version of ramfs I noticed that there is a
new address operation introduced which I can use to cleanup shmem.

This patch throws away some magic recalculation and makes the
accounting of shmem accurate.

It also encapsulates all accesses to the superblock_info into a macro.

The patch is on top of my previous ones.

Greetings
Christoph

diff -uNr 4-ac9/fs/proc/proc_misc.c c/fs/proc/proc_misc.c
--- 4-ac9/fs/proc/proc_misc.c   Thu May 17 13:17:37 2001
+++ c/fs/proc/proc_misc.c   Thu May 17 13:11:30 2001
@@ -140,17 +140,9 @@
 {
struct sysinfo i;
int len;
-   unsigned int cached, shmem;
+   unsigned int cached;
 
-   /*
-* There may be some inconsistency because shmem_nrpages
-* update is delayed to page_cache_size
-* We make sure the cached value does not get below zero 
-*/
-   cached = atomic_read(_cache_size);
-   shmem  = atomic_read(_nrpages);
-   if (shmem < cached)
-   cached -= shmem;
+   cached = atomic_read(_cache_size) - atomic_read(_nrpages);
 
 /*
  * display in kilobytes.
diff -uNr 4-ac9/mm/mmap.c c/mm/mmap.c
--- 4-ac9/mm/mmap.c Thu May 17 13:17:37 2001
+++ c/mm/mmap.c Thu May 17 10:54:22 2001
@@ -56,24 +56,14 @@
 */
 
long free;
-   unsigned long cached, shmem;
-
-   /*
-* There may be some inconsistency because shmem_nrpages
-* update is delayed to the page_cache_size
-* We make sure the cached value does not get below zero 
-*/
-   cached = atomic_read(_cache_size);
-   shmem  = atomic_read(_nrpages);
-   if (cached > shmem)
-   cached -= shmem;
 
 /* Sometimes we want to use more memory than we have. */
if (sysctl_overcommit_memory)
return 1;
 
free = atomic_read(_pages);
-   free += cached;
+   free += atomic_read(_cache_size) ;
+   free -= atomic_read(_nrpages);
free += nr_free_pages();
free += nr_swap_pages;
 
diff -uNr 4-ac9/mm/shmem.c c/mm/shmem.c
--- 4-ac9/mm/shmem.cThu May 17 13:17:37 2001
+++ c/mm/shmem.cThu May 17 10:54:03 2001
@@ -35,6 +35,8 @@
 
 #define ENTRIES_PER_PAGE (PAGE_SIZE/sizeof(unsigned long))
 
+#define SHMEM_SB(sb) (>u.shmem_sb)
+
 static struct super_operations shmem_ops;
 static struct address_space_operations shmem_aops;
 static struct file_operations shmem_file_operations;
@@ -50,44 +52,6 @@
 #define BLOCKS_PER_PAGE (PAGE_SIZE/512)
 
 /*
- * shmem_recalc_inode - recalculate the size of an inode
- *
- * @inode: inode to recalc
- * @swap:  additional swap pages freed externally
- *
- * We have to calculate the free blocks since the mm can drop pages
- * behind our back
- *
- * But we know that normally
- * inodes->i_blocks/BLOCKS_PER_PAGE == 
- * inode->i_mapping->nrpages + info->swapped
- *
- * So the mm freed 
- * inodes->i_blocks/BLOCKS_PER_PAGE - 
- * (inode->i_mapping->nrpages + info->swapped)
- *
- * It has to be called with the spinlock held.
- *
- * The swap parameter is a performance hack for truncate.
- */
-
-static void shmem_recalc_inode(struct inode * inode, unsigned long swap)
-{
-   unsigned long freed;
-
-   freed = (inode->i_blocks/BLOCKS_PER_PAGE) -
-   (inode->i_mapping->nrpages + SHMEM_I(inode)->swapped);
-   if (freed){
-   struct shmem_sb_info * info = >i_sb->u.shmem_sb;
-   inode->i_blocks -= freed*BLOCKS_PER_PAGE;
-   spin_lock (>stat_lock);
-   info->free_blocks += freed;
-   spin_unlock (>stat_lock);
-   atomic_sub(freed-swap, _nrpages);
-   }
-}
-
-/*
  * shmem_swp_entry - find the swap vector position in the info structure
  *
  * @info:  info structure for the inode
@@ -318,6 +282,7 @@
unsigned long index;
unsigned long freed = 0;
struct shmem_inode_info * info = SHMEM_I(inode);
+   struct shmem_sb_info * sbinfo = SHMEM_SB(inode->i_sb);
 
down(>sem);
inode->i_ctime = inode->i_mtime = CURRENT_TIME;
@@ -328,14 +293,28 @@
freed += shmem_truncate_indirect(info, index);
 
info->swapped -= freed;
-   shmem_recalc_inode(inode, freed);
+   spin_lock(>stat_lock);
+   sbinfo->free_blocks += freed;
+   spin_unlock(>stat_lock);
spin_unlock (>lock);
up(>sem);
 }
 
+static void shmem_truncatepage(struct page *page)
+{
+   struct inode *inode = (struct inode *)page->mapping->host;
+   struct shmem_sb_info * sbinfo = SHMEM_SB(inode->i_sb);
+
+   inode->i_blocks -= BLOCKS_PER_PAGE;
+   spin_lock (>stat_lock);
+   sbinfo->free_blocks++;
+   spin_unlock (>stat_lock);
+   atomic_dec(_nrpages);
+}
+
 static void shmem_delete_inode(struct inode * inode)
 {
-   struct shmem_sb_info *info = >i_sb->u.shmem_sb;
+   struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb);
 
inode->i_size = 

[Patch] ramfs accounting in -ac broken

2001-05-17 Thread Christoph Rohland

Hi Alan,

The ramfs accounting is broken for shared mmaps. It simply does not
recognize the pages allocated by writing into a shared mapping but
takes them into account when freed.

The attached patch should fix that.

Greetings
Christoph

--- 4-ac9/fs/ramfs/inode.c  Thu May 17 16:51:57 2001
+++ u4ac9/fs/ramfs/inode.c  Thu May 17 14:47:48 2001
@@ -163,9 +163,6 @@
struct ramfs_sb_info *rsb = RAMFS_SB(inode->i_sb);
int ret = 1;
 
-   if (PageDirty(page)) /* It's already been allocated */
-   return 1;
-
lock_rsb(rsb);

if ( (rsb->free_pages > 0) &&
@@ -185,8 +182,7 @@
 {
struct ramfs_sb_info *rsb = RAMFS_SB(inode->i_sb);
 
-   if (! PageDirty(page)) /* The page was never allocated 
- this can happen if it was only read */
+   if (! Page_Uptodate(page))
return;
 
lock_rsb(rsb);
@@ -241,6 +237,8 @@
 static int ramfs_readpage(struct file *file, struct page * page)
 {
if (!Page_Uptodate(page)) {
+   if (!ramfs_alloc_page(file->f_dentry->d_inode, page))
+   return -ENOSPC;
memset(kmap(page), 0, PAGE_CACHE_SIZE);
kunmap(page);
flush_dcache_page(page);
@@ -266,11 +264,12 @@
struct inode *inode = (struct inode *)page->mapping->host;
void *addr;

-   if (! ramfs_alloc_page(inode, page))
-   return -ENOSPC;
-
addr = (void *) kmap(page);
if (!Page_Uptodate(page)) {
+   if (! ramfs_alloc_page(inode, page)) {
+   kunmap(page);
+   return -ENOSPC;
+   }
memset(addr, 0, PAGE_CACHE_SIZE);
flush_dcache_page(page);
SetPageUptodate(page);

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/



Re: [PATCH] rootfs (part 1)

2001-05-17 Thread Christoph Rohland

Hi Alan,

On Thu, 17 May 2001, Alan Cox wrote:
> I think you have a major tool problem.
> 
> bash-2.04$ size mm/shmem.o
>text  data bss dec hex filename
>7422   572   079941f3a mm/shmem.o
> bash-2.04$ size fs/ramfs/ramfs.o 
>text  data bss dec hex filename
>3185   368   03553 de1 fs/ramfs/ramfs.o
> 
> Never trust ls -l size for binaries, its very very unrelated.
> 
> So ramfs is 3553 bytes, shmem.o in total is 8K on current -ac.

But you cannot disable shmem.o totally. That's my whole point in the
discussion. Why add something what is mostly included in the kernel
already?

You have to compare shmem with tmpfs against shmem w/o it:

   textdata bss dec hex filename
   3398 376   03774 ebe fs/ramfs/ramfs.o
   5150 484   056341602 mm/shmem.o
   9174 636   098102652 mm/shmem.o+tmpfs

So tmpfs is 400 Bytes bigger than ramfs. 

If you add the correct timestamp handling the difference will go down
further. And we gain functionality, don't we?

Greetings
Christoph


-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/



Re: [PATCH] rootfs (part 1)

2001-05-17 Thread Christoph Rohland

Hi Alan,

On Thu, 17 May 2001, Alan Cox wrote:
 I think you have a major tool problem.
 
 bash-2.04$ size mm/shmem.o
text  data bss dec hex filename
7422   572   079941f3a mm/shmem.o
 bash-2.04$ size fs/ramfs/ramfs.o 
text  data bss dec hex filename
3185   368   03553 de1 fs/ramfs/ramfs.o
 
 Never trust ls -l size for binaries, its very very unrelated.
 
 So ramfs is 3553 bytes, shmem.o in total is 8K on current -ac.

But you cannot disable shmem.o totally. That's my whole point in the
discussion. Why add something what is mostly included in the kernel
already?

You have to compare shmem with tmpfs against shmem w/o it:

   textdata bss dec hex filename
   3398 376   03774 ebe fs/ramfs/ramfs.o
   5150 484   056341602 mm/shmem.o
   9174 636   098102652 mm/shmem.o+tmpfs

So tmpfs is 400 Bytes bigger than ramfs. 

If you add the correct timestamp handling the difference will go down
further. And we gain functionality, don't we?

Greetings
Christoph


-
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/



[Patch] ramfs accounting in -ac broken

2001-05-17 Thread Christoph Rohland

Hi Alan,

The ramfs accounting is broken for shared mmaps. It simply does not
recognize the pages allocated by writing into a shared mapping but
takes them into account when freed.

The attached patch should fix that.

Greetings
Christoph

--- 4-ac9/fs/ramfs/inode.c  Thu May 17 16:51:57 2001
+++ u4ac9/fs/ramfs/inode.c  Thu May 17 14:47:48 2001
@@ -163,9 +163,6 @@
struct ramfs_sb_info *rsb = RAMFS_SB(inode-i_sb);
int ret = 1;
 
-   if (PageDirty(page)) /* It's already been allocated */
-   return 1;
-
lock_rsb(rsb);

if ( (rsb-free_pages  0) 
@@ -185,8 +182,7 @@
 {
struct ramfs_sb_info *rsb = RAMFS_SB(inode-i_sb);
 
-   if (! PageDirty(page)) /* The page was never allocated 
- this can happen if it was only read */
+   if (! Page_Uptodate(page))
return;
 
lock_rsb(rsb);
@@ -241,6 +237,8 @@
 static int ramfs_readpage(struct file *file, struct page * page)
 {
if (!Page_Uptodate(page)) {
+   if (!ramfs_alloc_page(file-f_dentry-d_inode, page))
+   return -ENOSPC;
memset(kmap(page), 0, PAGE_CACHE_SIZE);
kunmap(page);
flush_dcache_page(page);
@@ -266,11 +264,12 @@
struct inode *inode = (struct inode *)page-mapping-host;
void *addr;

-   if (! ramfs_alloc_page(inode, page))
-   return -ENOSPC;
-
addr = (void *) kmap(page);
if (!Page_Uptodate(page)) {
+   if (! ramfs_alloc_page(inode, page)) {
+   kunmap(page);
+   return -ENOSPC;
+   }
memset(addr, 0, PAGE_CACHE_SIZE);
flush_dcache_page(page);
SetPageUptodate(page);

-
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/



[Patch] tmpfs accounting cleanup for -ac series

2001-05-17 Thread Christoph Rohland

Hi Alan,

While looking at the -ac version of ramfs I noticed that there is a
new address operation introduced which I can use to cleanup shmem.

This patch throws away some magic recalculation and makes the
accounting of shmem accurate.

It also encapsulates all accesses to the superblock_info into a macro.

The patch is on top of my previous ones.

Greetings
Christoph

diff -uNr 4-ac9/fs/proc/proc_misc.c c/fs/proc/proc_misc.c
--- 4-ac9/fs/proc/proc_misc.c   Thu May 17 13:17:37 2001
+++ c/fs/proc/proc_misc.c   Thu May 17 13:11:30 2001
@@ -140,17 +140,9 @@
 {
struct sysinfo i;
int len;
-   unsigned int cached, shmem;
+   unsigned int cached;
 
-   /*
-* There may be some inconsistency because shmem_nrpages
-* update is delayed to page_cache_size
-* We make sure the cached value does not get below zero 
-*/
-   cached = atomic_read(page_cache_size);
-   shmem  = atomic_read(shmem_nrpages);
-   if (shmem  cached)
-   cached -= shmem;
+   cached = atomic_read(page_cache_size) - atomic_read(shmem_nrpages);
 
 /*
  * display in kilobytes.
diff -uNr 4-ac9/mm/mmap.c c/mm/mmap.c
--- 4-ac9/mm/mmap.c Thu May 17 13:17:37 2001
+++ c/mm/mmap.c Thu May 17 10:54:22 2001
@@ -56,24 +56,14 @@
 */
 
long free;
-   unsigned long cached, shmem;
-
-   /*
-* There may be some inconsistency because shmem_nrpages
-* update is delayed to the page_cache_size
-* We make sure the cached value does not get below zero 
-*/
-   cached = atomic_read(page_cache_size);
-   shmem  = atomic_read(shmem_nrpages);
-   if (cached  shmem)
-   cached -= shmem;
 
 /* Sometimes we want to use more memory than we have. */
if (sysctl_overcommit_memory)
return 1;
 
free = atomic_read(buffermem_pages);
-   free += cached;
+   free += atomic_read(page_cache_size) ;
+   free -= atomic_read(shmem_nrpages);
free += nr_free_pages();
free += nr_swap_pages;
 
diff -uNr 4-ac9/mm/shmem.c c/mm/shmem.c
--- 4-ac9/mm/shmem.cThu May 17 13:17:37 2001
+++ c/mm/shmem.cThu May 17 10:54:03 2001
@@ -35,6 +35,8 @@
 
 #define ENTRIES_PER_PAGE (PAGE_SIZE/sizeof(unsigned long))
 
+#define SHMEM_SB(sb) (sb-u.shmem_sb)
+
 static struct super_operations shmem_ops;
 static struct address_space_operations shmem_aops;
 static struct file_operations shmem_file_operations;
@@ -50,44 +52,6 @@
 #define BLOCKS_PER_PAGE (PAGE_SIZE/512)
 
 /*
- * shmem_recalc_inode - recalculate the size of an inode
- *
- * @inode: inode to recalc
- * @swap:  additional swap pages freed externally
- *
- * We have to calculate the free blocks since the mm can drop pages
- * behind our back
- *
- * But we know that normally
- * inodes-i_blocks/BLOCKS_PER_PAGE == 
- * inode-i_mapping-nrpages + info-swapped
- *
- * So the mm freed 
- * inodes-i_blocks/BLOCKS_PER_PAGE - 
- * (inode-i_mapping-nrpages + info-swapped)
- *
- * It has to be called with the spinlock held.
- *
- * The swap parameter is a performance hack for truncate.
- */
-
-static void shmem_recalc_inode(struct inode * inode, unsigned long swap)
-{
-   unsigned long freed;
-
-   freed = (inode-i_blocks/BLOCKS_PER_PAGE) -
-   (inode-i_mapping-nrpages + SHMEM_I(inode)-swapped);
-   if (freed){
-   struct shmem_sb_info * info = inode-i_sb-u.shmem_sb;
-   inode-i_blocks -= freed*BLOCKS_PER_PAGE;
-   spin_lock (info-stat_lock);
-   info-free_blocks += freed;
-   spin_unlock (info-stat_lock);
-   atomic_sub(freed-swap, shmem_nrpages);
-   }
-}
-
-/*
  * shmem_swp_entry - find the swap vector position in the info structure
  *
  * @info:  info structure for the inode
@@ -318,6 +282,7 @@
unsigned long index;
unsigned long freed = 0;
struct shmem_inode_info * info = SHMEM_I(inode);
+   struct shmem_sb_info * sbinfo = SHMEM_SB(inode-i_sb);
 
down(info-sem);
inode-i_ctime = inode-i_mtime = CURRENT_TIME;
@@ -328,14 +293,28 @@
freed += shmem_truncate_indirect(info, index);
 
info-swapped -= freed;
-   shmem_recalc_inode(inode, freed);
+   spin_lock(sbinfo-stat_lock);
+   sbinfo-free_blocks += freed;
+   spin_unlock(sbinfo-stat_lock);
spin_unlock (info-lock);
up(info-sem);
 }
 
+static void shmem_truncatepage(struct page *page)
+{
+   struct inode *inode = (struct inode *)page-mapping-host;
+   struct shmem_sb_info * sbinfo = SHMEM_SB(inode-i_sb);
+
+   inode-i_blocks -= BLOCKS_PER_PAGE;
+   spin_lock (sbinfo-stat_lock);
+   sbinfo-free_blocks++;
+   spin_unlock (sbinfo-stat_lock);
+   atomic_dec(shmem_nrpages);
+}
+
 static void shmem_delete_inode(struct inode * inode)
 {
-   struct shmem_sb_info *info = inode-i_sb-u.shmem_sb;
+

Re: [PATCH] rootfs (part 1)

2001-05-16 Thread Christoph Rohland

Hi Alexander,

On Wed, 16 May 2001, Alexander Viro wrote:
> Because what I need is an absolute minimum. Heck, I don't even use
> regular files (in the full variant of patch, that is). They might
> become useful, but I can live with mkdir() and mknod().

So what about adding shmem_mknod and shmem_mkdir to the core shmem.c
part? They are now under CONFIG_TMPFS but are only ~20 lines of code.

Greetings
Christoph


-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/



Re: [PATCH] rootfs (part 1)

2001-05-16 Thread Christoph Rohland

Hi Linus,

On Wed, 16 May 2001, Linus Torvalds wrote:
> 
> On 16 May 2001, Christoph Rohland wrote:
>> 
>> cr:/speicher/src/u4ac9 $ ls -l mm/shmem.o*
>> -rw-r--r--1 cr   users  154652 Mai 16 19:27 mm/shmem.o-tmpfs
>> -rw-r--r--1 cr   users  180764 Mai 16 19:24 mm/shmem.o+tmpfs
>> cr:/speicher/src/u4ac9 $ ls -l fs/ramfs/ramfs.o
>> -rw-r--r--1 cr   users  141452 Mai 16 19:27 fs/ramfs/ramfs.o
>> 
>> So CONFIG_TMPFS adds 26k and ramfs 140k.
> 
> What the hell are you doing? Compiling with debugging or something?

Yep, sorry that was uml with debugging info.

> The ramfs inode.o file (the only file that ramfs contains) has 376
> bytes of data and 1612 bytes of code. BYTES. The whole final object
> file with all the relocation information is
> 
>   -rw-r--r-- 1 torvalds eng 5734 May 16 10:58 ramfs.o
> 
> but out of that 5.5kB, only 2kB are actually linked into the kernel
> and are used to _run_.

-rw-r--r--1 root root 8656 May 16 20:27 fs/ramfs/ramfs.o
-rw-r--r--1 root root11688 May 16 20:24 mm/shmem.o-tmpfs
-rw-r--r--1 root root18592 May 16 20:20 mm/shmem.o+tmpfs

That's an -ac kernel, so ramfs does accounting and is a little bigger
than yours.

So the read/write support in tmpfs is about the same size as ramfs.

Greetings
Christoph


-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/



Re: [PATCH] rootfs (part 1)

2001-05-16 Thread Christoph Rohland

Hi Linus,

On Wed, 16 May 2001, Linus Torvalds wrote:
> Looks ok, but it also feels like 2.5.x stuff to me. 
> 
> Also, there's the question of whether to make ramfs just built-in,
> or make _tmpfs_ built in - ramfs is certainly simpler, but tmpfs
> does the same things and you need that one for shared mappings etc.
> 
> Comments?

cr:/speicher/src/u4ac9 $ ls -l mm/shmem.o*
-rw-r--r--1 cr   users  154652 Mai 16 19:27 mm/shmem.o-tmpfs
-rw-r--r--1 cr   users  180764 Mai 16 19:24 mm/shmem.o+tmpfs
cr:/speicher/src/u4ac9 $ ls -l fs/ramfs/ramfs.o
-rw-r--r--1 cr   users  141452 Mai 16 19:27 fs/ramfs/ramfs.o

So CONFIG_TMPFS adds 26k and ramfs 140k.

Greetings
Christoph


-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/



Re: [PATCH] rootfs (part 1)

2001-05-16 Thread Christoph Rohland

Hi Al,

On Wed, 16 May 2001, Alexander Viro wrote:
>   One point that might be better done differently - since we
> need ramfs for boot I've just made fs/Config.in declare CONFIG_RAMFS
> as define_bool CONFIG_RAMFS y. If ramfs grows (e.g. gets resource
> limits patches from -ac) we might be better off doing a minimal
> variant permanently in kernel (calling it rootfs) and making
> ramfs use rootfs methods. It's completely separate issue, so I've
> done it the simplest way for the time being.

Why do you use ramfs? Most of it is duplicated in tmpfs and ramfs is a
minimal _example_ fs. There was some agreement that this should stay
so.

Look into mm/shmem.c and look how little is added by CONFIG_TMPFS and
how much is duplicated from ramfs

If we really think the added swap vector per file in tmpfs is a major
overhead we should add the nonswapping functions there.

Greetings
Christoph


-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/



Re: [PATCH] rootfs (part 1)

2001-05-16 Thread Christoph Rohland

Hi Al,

On Wed, 16 May 2001, Alexander Viro wrote:
   One point that might be better done differently - since we
 need ramfs for boot I've just made fs/Config.in declare CONFIG_RAMFS
 as define_bool CONFIG_RAMFS y. If ramfs grows (e.g. gets resource
 limits patches from -ac) we might be better off doing a minimal
 variant permanently in kernel (calling it rootfs) and making
 ramfs use rootfs methods. It's completely separate issue, so I've
 done it the simplest way for the time being.

Why do you use ramfs? Most of it is duplicated in tmpfs and ramfs is a
minimal _example_ fs. There was some agreement that this should stay
so.

Look into mm/shmem.c and look how little is added by CONFIG_TMPFS and
how much is duplicated from ramfs

If we really think the added swap vector per file in tmpfs is a major
overhead we should add the nonswapping functions there.

Greetings
Christoph


-
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/



Re: [PATCH] rootfs (part 1)

2001-05-16 Thread Christoph Rohland

Hi Linus,

On Wed, 16 May 2001, Linus Torvalds wrote:
 Looks ok, but it also feels like 2.5.x stuff to me. 
 
 Also, there's the question of whether to make ramfs just built-in,
 or make _tmpfs_ built in - ramfs is certainly simpler, but tmpfs
 does the same things and you need that one for shared mappings etc.
 
 Comments?

cr:/speicher/src/u4ac9 $ ls -l mm/shmem.o*
-rw-r--r--1 cr   users  154652 Mai 16 19:27 mm/shmem.o-tmpfs
-rw-r--r--1 cr   users  180764 Mai 16 19:24 mm/shmem.o+tmpfs
cr:/speicher/src/u4ac9 $ ls -l fs/ramfs/ramfs.o
-rw-r--r--1 cr   users  141452 Mai 16 19:27 fs/ramfs/ramfs.o

So CONFIG_TMPFS adds 26k and ramfs 140k.

Greetings
Christoph


-
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/



Re: [PATCH] rootfs (part 1)

2001-05-16 Thread Christoph Rohland

Hi Linus,

On Wed, 16 May 2001, Linus Torvalds wrote:
 
 On 16 May 2001, Christoph Rohland wrote:
 
 cr:/speicher/src/u4ac9 $ ls -l mm/shmem.o*
 -rw-r--r--1 cr   users  154652 Mai 16 19:27 mm/shmem.o-tmpfs
 -rw-r--r--1 cr   users  180764 Mai 16 19:24 mm/shmem.o+tmpfs
 cr:/speicher/src/u4ac9 $ ls -l fs/ramfs/ramfs.o
 -rw-r--r--1 cr   users  141452 Mai 16 19:27 fs/ramfs/ramfs.o
 
 So CONFIG_TMPFS adds 26k and ramfs 140k.
 
 What the hell are you doing? Compiling with debugging or something?

Yep, sorry that was uml with debugging info.

 The ramfs inode.o file (the only file that ramfs contains) has 376
 bytes of data and 1612 bytes of code. BYTES. The whole final object
 file with all the relocation information is
 
   -rw-r--r-- 1 torvalds eng 5734 May 16 10:58 ramfs.o
 
 but out of that 5.5kB, only 2kB are actually linked into the kernel
 and are used to _run_.

-rw-r--r--1 root root 8656 May 16 20:27 fs/ramfs/ramfs.o
-rw-r--r--1 root root11688 May 16 20:24 mm/shmem.o-tmpfs
-rw-r--r--1 root root18592 May 16 20:20 mm/shmem.o+tmpfs

That's an -ac kernel, so ramfs does accounting and is a little bigger
than yours.

So the read/write support in tmpfs is about the same size as ramfs.

Greetings
Christoph


-
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/



Re: [PATCH] rootfs (part 1)

2001-05-16 Thread Christoph Rohland

Hi Alexander,

On Wed, 16 May 2001, Alexander Viro wrote:
 Because what I need is an absolute minimum. Heck, I don't even use
 regular files (in the full variant of patch, that is). They might
 become useful, but I can live with mkdir() and mknod().

So what about adding shmem_mknod and shmem_mkdir to the core shmem.c
part? They are now under CONFIG_TMPFS but are only ~20 lines of code.

Greetings
Christoph


-
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/



[Patch] allow tmpfs bigger than 1GB on s390x

2001-05-15 Thread Christoph Rohland

Hi Martin,

Here is the patch which implements triple indirect blocks in
tmpfs. 

For the rest of the word: This is needed since s390x is a 64 Bit
platform with pagesize of 4k :-(

It is on top of my other tmpfs fixes which you can find at
ftp://ftp.sap.com/pub/linuxlab/people/cr

Greetings
Christoph



diff -uNr 4-mSsas/include/linux/shmem_fs.h 4-mSsasb/include/linux/shmem_fs.h
--- 4-mSsas/include/linux/shmem_fs.hMon May 14 08:49:42 2001
+++ 4-mSsasb/include/linux/shmem_fs.h   Mon May 14 09:05:39 2001
@@ -22,9 +22,9 @@
 struct shmem_inode_info {
spinlock_t  lock;
struct semaphoresem;
-   unsigned long   max_index;
+   unsigned long   next_index;
swp_entry_t i_direct[SHMEM_NR_DIRECT]; /* for the first blocks */
-   swp_entry_t   **i_indirect; /* doubly indirect blocks */
+   void  **i_indirect; /* indirect blocks */
unsigned long   swapped;
int locked; /* into memory */
struct list_headlist;
diff -uNr 4-mSsas/mm/shmem.c 4-mSsasb/mm/shmem.c
--- 4-mSsas/mm/shmem.c  Mon May 14 08:49:42 2001
+++ 4-mSsasb/mm/shmem.c Tue May 15 09:12:00 2001
@@ -34,7 +34,6 @@
 #define TMPFS_MAGIC0x01021994
 
 #define ENTRIES_PER_PAGE (PAGE_SIZE/sizeof(unsigned long))
-#define NR_SINGLE (ENTRIES_PER_PAGE + SHMEM_NR_DIRECT)
 
 static struct super_operations shmem_ops;
 static struct address_space_operations shmem_aops;
@@ -65,7 +64,7 @@
  *
  * So the mm freed 
  * inodes->i_blocks/BLOCKS_PER_PAGE - 
- * (inode->i_mapping->nrpages + info->swapped)
+ * (inode->i_mapping->nrpages + info->swapped)
  *
  * It has to be called with the spinlock held.
  *
@@ -88,9 +87,53 @@
}
 }
 
-static swp_entry_t * shmem_swp_entry (struct shmem_inode_info *info, unsigned long 
index) 
+/*
+ * shmem_swp_entry - find the swap vector position in the info structure
+ *
+ * @info:  info structure for the inode
+ * @index: index of the page to find
+ * @page:  optional page to add to the structure. Has to be preset to
+ * all zeros
+ *
+ * If there is no space allocated yet it will return -ENOMEM when
+ * page == 0 else it will use the page for the needed block.
+ *
+ * returns -EFBIG if the index is too big.
+ *
+ *
+ * The swap vector is organized the following way:
+ *
+ * There are SHMEM_NR_DIRECT entries directly stored in the
+ * shmem_inode_info structure. So small files do not need an addional
+ * allocation.
+ *
+ * For pages with index > SHMEM_NR_DIRECT there is the pointer
+ * i_indirect which points to a page which holds in the first half
+ * doubly indirect blocks, in the second half triple indirect blocks:
+ *
+ * For an artificial ENTRIES_PER_PAGE = 4 this would lead to the
+ * following layout (for SHMEM_NR_DIRECT == 16):
+ *
+ * i_indirect -> dir --> 16-19
+ *   |  +-> 20-23
+ *   |
+ *   +-->dir2 --> 24-27
+ *   |+-> 28-31
+ *   |+-> 32-35
+ *   |+-> 36-39
+ *   |
+ *   +-->dir3 --> 40-43
+ *+-> 44-47
+ *+-> 48-51
+ *+-> 52-55
+ */
+
+#define SHMEM_MAX_BLOCKS (SHMEM_NR_DIRECT + ENTRIES_PER_PAGE * 
+ENTRIES_PER_PAGE/2*(ENTRIES_PER_PAGE+1))
+
+static swp_entry_t * shmem_swp_entry (struct shmem_inode_info *info, unsigned long 
+index, unsigned long page) 
 {
unsigned long offset;
+   void **dir;
 
if (index < SHMEM_NR_DIRECT)
return info->i_direct+index;
@@ -99,23 +142,66 @@
offset = index % ENTRIES_PER_PAGE;
index /= ENTRIES_PER_PAGE;
 
-   if (index >= ENTRIES_PER_PAGE)
-   return ERR_PTR(-EFBIG);
-
if (!info->i_indirect) {
-   info->i_indirect = (swp_entry_t **) get_zeroed_page(GFP_USER);
-   if (!info->i_indirect)
+   info->i_indirect = (void *) page;
+   return ERR_PTR(-ENOMEM);
+   }
+
+   dir = info->i_indirect + index;
+   if (index >= ENTRIES_PER_PAGE/2) {
+   index -= ENTRIES_PER_PAGE/2;
+   dir = info->i_indirect + ENTRIES_PER_PAGE/2 
+   + index/ENTRIES_PER_PAGE;
+   index %= ENTRIES_PER_PAGE;
+
+   if(!*dir) {
+   *dir = (void *) page;
+   /* We return since we will need another page
+   in the next step */
return ERR_PTR(-ENOMEM);
+   }
+   dir = ((void **)*dir) + index;
}
-   if(!(info->i_indirect[index])) {
-   info->i_indirect[index] = (swp_entry_t *) get_zeroed_page(GFP_USER);
-   if (!info->i_indirect[index])
+   if (!*dir) {
+   if (!page)
return ERR_PTR(-ENOMEM);
+   *dir = (void *)page;
}
-   
-   return 

Assorted tmpfs fixes

2001-05-15 Thread Christoph Rohland
struct sysinfo i;
int len;
+   unsigned int cached, shmem;
+
+   /*
+* There may be some inconsistency because shmem_nrpages
+* update is delayed to page_cache_size
+* We make sure the cached value does not get below zero 
+*/
+   cached = atomic_read(_cache_size);
+   shmem  = atomic_read(_nrpages);
+   if (shmem < cached)
+   cached -= shmem;
 
 /*
  * display in kilobytes.
@@ -153,8 +164,8 @@
 "Swap: %8lu %8lu %8lu\n",
 B(i.totalram), B(i.totalram-i.freeram), B(i.freeram),
 B(i.sharedram), B(i.bufferram),
-B(atomic_read(_cache_size)), B(i.totalswap),
-B(i.totalswap-i.freeswap), B(i.freeswap));
+   B(cached), B(i.totalswap),
+   B(i.totalswap-i.freeswap), B(i.freeswap));
 /*
  * Tagged format, for easy grepping and expansion.
  * The above will go away eventually, once the tools
@@ -180,7 +191,7 @@
 K(i.freeram),
 K(i.sharedram),
 K(i.bufferram),
-K(atomic_read(_cache_size)),
+   K(cached),
K(nr_active_pages),
K(nr_inactive_dirty_pages),
K(nr_inactive_clean_pages()),
diff -uNr 2.4.4-mSsu/include/linux/shmem_fs.h 2.4.4-mSsua/include/linux/shmem_fs.h
--- 2.4.4-mSsu/include/linux/shmem_fs.h Wed May  2 18:36:05 2001
+++ 2.4.4-mSsua/include/linux/shmem_fs.hMon May  7 12:52:00 2001
@@ -17,6 +17,8 @@
unsigned long val;
 } swp_entry_t;
 
+extern atomic_t shmem_nrpages;
+
 struct shmem_inode_info {
spinlock_t  lock;
struct semaphoresem;
diff -uNr 2.4.4-mSsu/mm/mmap.c 2.4.4-mSsua/mm/mmap.c
--- 2.4.4-mSsu/mm/mmap.cSun Apr 29 20:33:01 2001
+++ 2.4.4-mSsua/mm/mmap.c   Mon May  7 13:42:03 2001
@@ -55,13 +55,24 @@
 */
 
long free;
-   
+   unsigned long cached, shmem;
+
+   /*
+* There may be some inconsistency because shmem_nrpages
+* update is delayed to the page_cache_size
+* We make sure the cached value does not get below zero 
+*/
+   cached = atomic_read(_cache_size);
+   shmem  = atomic_read(_nrpages);
+   if (cached > shmem)
+   cached -= shmem;
+
 /* Sometimes we want to use more memory than we have. */
if (sysctl_overcommit_memory)
return 1;
 
free = atomic_read(_pages);
-   free += atomic_read(_cache_size);
+   free += cached;
    free += nr_free_pages();
free += nr_swap_pages;
 
diff -uNr 2.4.4-mSsu/mm/shmem.c 2.4.4-mSsua/mm/shmem.c
--- 2.4.4-mSsu/mm/shmem.c   Fri May  4 21:37:34 2001
+++ 2.4.4-mSsua/mm/shmem.c  Mon May  7 11:13:27 2001
@@ -3,7 +3,8 @@
  *
  * Copyright (C) 2000 Linus Torvalds.
  *  2000 Transmeta Corp.
- *  2000 Christoph Rohland
+ *  2000-2001 Christoph Rohland
+ *  2000-2001 SAP AG
  * 
  * This file is released under the GPL.
  */
@@ -45,6 +46,7 @@
 
 LIST_HEAD (shmem_inodes);
 static spinlock_t shmem_ilock = SPIN_LOCK_UNLOCKED;
+atomic_t shmem_nrpages = ATOMIC_INIT(0);
 
 #define BLOCKS_PER_PAGE (PAGE_SIZE/512)
 
@@ -52,6 +54,7 @@
  * shmem_recalc_inode - recalculate the size of an inode
  *
  * @inode: inode to recalc
+ * @swap:  additional swap pages freed externally
  *
  * We have to calculate the free blocks since the mm can drop pages
  * behind our back
@@ -62,12 +65,14 @@
  *
  * So the mm freed 
  * inodes->i_blocks/BLOCKS_PER_PAGE - 
- * (inode->i_mapping->nrpages + info->swapped)
+ * (inode->i_mapping->nrpages + info->swapped)
  *
  * It has to be called with the spinlock held.
+ *
+ * The swap parameter is a performance hack for truncate.
  */
 
-static void shmem_recalc_inode(struct inode * inode)
+static void shmem_recalc_inode(struct inode * inode, unsigned long swap)
 {
unsigned long freed;
 
@@ -79,6 +84,7 @@
spin_lock (>stat_lock);
info->free_blocks += freed;
spin_unlock (>stat_lock);
+   atomic_sub(freed-swap, _nrpages);
}
 }
 
@@ -195,7 +201,7 @@
 out:
info->max_index = index;
info->swapped -= freed;
-   shmem_recalc_inode(inode);
+   shmem_recalc_inode(inode, freed);
spin_unlock (>lock);
up(>sem);
 }
@@ -250,14 +256,15 @@
entry = shmem_swp_entry(info, page->index);
if (IS_ERR(entry))  /* this had been allocted on page allocation */
BUG();
-   shmem_recalc_inode(page->mapping->host);
+   shmem_recalc_inode(page->mapping->host, 0);
error = -EAGAIN;
if (entry->val)
BUG();
 
*entry = swap;
error = 0;
-   /* Remove the from the page cache */
+   /* Remove the page from the page 

Assorted tmpfs fixes

2001-05-15 Thread Christoph Rohland
, shmem;
+
+   /*
+* There may be some inconsistency because shmem_nrpages
+* update is delayed to page_cache_size
+* We make sure the cached value does not get below zero 
+*/
+   cached = atomic_read(page_cache_size);
+   shmem  = atomic_read(shmem_nrpages);
+   if (shmem  cached)
+   cached -= shmem;
 
 /*
  * display in kilobytes.
@@ -153,8 +164,8 @@
 Swap: %8lu %8lu %8lu\n,
 B(i.totalram), B(i.totalram-i.freeram), B(i.freeram),
 B(i.sharedram), B(i.bufferram),
-B(atomic_read(page_cache_size)), B(i.totalswap),
-B(i.totalswap-i.freeswap), B(i.freeswap));
+   B(cached), B(i.totalswap),
+   B(i.totalswap-i.freeswap), B(i.freeswap));
 /*
  * Tagged format, for easy grepping and expansion.
  * The above will go away eventually, once the tools
@@ -180,7 +191,7 @@
 K(i.freeram),
 K(i.sharedram),
 K(i.bufferram),
-K(atomic_read(page_cache_size)),
+   K(cached),
K(nr_active_pages),
K(nr_inactive_dirty_pages),
K(nr_inactive_clean_pages()),
diff -uNr 2.4.4-mSsu/include/linux/shmem_fs.h 2.4.4-mSsua/include/linux/shmem_fs.h
--- 2.4.4-mSsu/include/linux/shmem_fs.h Wed May  2 18:36:05 2001
+++ 2.4.4-mSsua/include/linux/shmem_fs.hMon May  7 12:52:00 2001
@@ -17,6 +17,8 @@
unsigned long val;
 } swp_entry_t;
 
+extern atomic_t shmem_nrpages;
+
 struct shmem_inode_info {
spinlock_t  lock;
struct semaphoresem;
diff -uNr 2.4.4-mSsu/mm/mmap.c 2.4.4-mSsua/mm/mmap.c
--- 2.4.4-mSsu/mm/mmap.cSun Apr 29 20:33:01 2001
+++ 2.4.4-mSsua/mm/mmap.c   Mon May  7 13:42:03 2001
@@ -55,13 +55,24 @@
 */
 
long free;
-   
+   unsigned long cached, shmem;
+
+   /*
+* There may be some inconsistency because shmem_nrpages
+* update is delayed to the page_cache_size
+* We make sure the cached value does not get below zero 
+*/
+   cached = atomic_read(page_cache_size);
+   shmem  = atomic_read(shmem_nrpages);
+   if (cached  shmem)
+   cached -= shmem;
+
 /* Sometimes we want to use more memory than we have. */
if (sysctl_overcommit_memory)
return 1;
 
free = atomic_read(buffermem_pages);
-   free += atomic_read(page_cache_size);
+   free += cached;
free += nr_free_pages();
free += nr_swap_pages;
 
diff -uNr 2.4.4-mSsu/mm/shmem.c 2.4.4-mSsua/mm/shmem.c
--- 2.4.4-mSsu/mm/shmem.c   Fri May  4 21:37:34 2001
+++ 2.4.4-mSsua/mm/shmem.c  Mon May  7 11:13:27 2001
@@ -3,7 +3,8 @@
  *
  * Copyright (C) 2000 Linus Torvalds.
  *  2000 Transmeta Corp.
- *  2000 Christoph Rohland
+ *  2000-2001 Christoph Rohland
+ *  2000-2001 SAP AG
  * 
  * This file is released under the GPL.
  */
@@ -45,6 +46,7 @@
 
 LIST_HEAD (shmem_inodes);
 static spinlock_t shmem_ilock = SPIN_LOCK_UNLOCKED;
+atomic_t shmem_nrpages = ATOMIC_INIT(0);
 
 #define BLOCKS_PER_PAGE (PAGE_SIZE/512)
 
@@ -52,6 +54,7 @@
  * shmem_recalc_inode - recalculate the size of an inode
  *
  * @inode: inode to recalc
+ * @swap:  additional swap pages freed externally
  *
  * We have to calculate the free blocks since the mm can drop pages
  * behind our back
@@ -62,12 +65,14 @@
  *
  * So the mm freed 
  * inodes-i_blocks/BLOCKS_PER_PAGE - 
- * (inode-i_mapping-nrpages + info-swapped)
+ * (inode-i_mapping-nrpages + info-swapped)
  *
  * It has to be called with the spinlock held.
+ *
+ * The swap parameter is a performance hack for truncate.
  */
 
-static void shmem_recalc_inode(struct inode * inode)
+static void shmem_recalc_inode(struct inode * inode, unsigned long swap)
 {
unsigned long freed;
 
@@ -79,6 +84,7 @@
spin_lock (info-stat_lock);
info-free_blocks += freed;
spin_unlock (info-stat_lock);
+   atomic_sub(freed-swap, shmem_nrpages);
}
 }
 
@@ -195,7 +201,7 @@
 out:
info-max_index = index;
info-swapped -= freed;
-   shmem_recalc_inode(inode);
+   shmem_recalc_inode(inode, freed);
spin_unlock (info-lock);
up(info-sem);
 }
@@ -250,14 +256,15 @@
entry = shmem_swp_entry(info, page-index);
if (IS_ERR(entry))  /* this had been allocted on page allocation */
BUG();
-   shmem_recalc_inode(page-mapping-host);
+   shmem_recalc_inode(page-mapping-host, 0);
error = -EAGAIN;
if (entry-val)
BUG();
 
*entry = swap;
error = 0;
-   /* Remove the from the page cache */
+   /* Remove the page from the page cache */
+   atomic_dec(shmem_nrpages);
lru_cache_del(page

[Patch] allow tmpfs bigger than 1GB on s390x

2001-05-15 Thread Christoph Rohland

Hi Martin,

Here is the patch which implements triple indirect blocks in
tmpfs. 

For the rest of the word: This is needed since s390x is a 64 Bit
platform with pagesize of 4k :-(

It is on top of my other tmpfs fixes which you can find at
ftp://ftp.sap.com/pub/linuxlab/people/cr

Greetings
Christoph



diff -uNr 4-mSsas/include/linux/shmem_fs.h 4-mSsasb/include/linux/shmem_fs.h
--- 4-mSsas/include/linux/shmem_fs.hMon May 14 08:49:42 2001
+++ 4-mSsasb/include/linux/shmem_fs.h   Mon May 14 09:05:39 2001
@@ -22,9 +22,9 @@
 struct shmem_inode_info {
spinlock_t  lock;
struct semaphoresem;
-   unsigned long   max_index;
+   unsigned long   next_index;
swp_entry_t i_direct[SHMEM_NR_DIRECT]; /* for the first blocks */
-   swp_entry_t   **i_indirect; /* doubly indirect blocks */
+   void  **i_indirect; /* indirect blocks */
unsigned long   swapped;
int locked; /* into memory */
struct list_headlist;
diff -uNr 4-mSsas/mm/shmem.c 4-mSsasb/mm/shmem.c
--- 4-mSsas/mm/shmem.c  Mon May 14 08:49:42 2001
+++ 4-mSsasb/mm/shmem.c Tue May 15 09:12:00 2001
@@ -34,7 +34,6 @@
 #define TMPFS_MAGIC0x01021994
 
 #define ENTRIES_PER_PAGE (PAGE_SIZE/sizeof(unsigned long))
-#define NR_SINGLE (ENTRIES_PER_PAGE + SHMEM_NR_DIRECT)
 
 static struct super_operations shmem_ops;
 static struct address_space_operations shmem_aops;
@@ -65,7 +64,7 @@
  *
  * So the mm freed 
  * inodes-i_blocks/BLOCKS_PER_PAGE - 
- * (inode-i_mapping-nrpages + info-swapped)
+ * (inode-i_mapping-nrpages + info-swapped)
  *
  * It has to be called with the spinlock held.
  *
@@ -88,9 +87,53 @@
}
 }
 
-static swp_entry_t * shmem_swp_entry (struct shmem_inode_info *info, unsigned long 
index) 
+/*
+ * shmem_swp_entry - find the swap vector position in the info structure
+ *
+ * @info:  info structure for the inode
+ * @index: index of the page to find
+ * @page:  optional page to add to the structure. Has to be preset to
+ * all zeros
+ *
+ * If there is no space allocated yet it will return -ENOMEM when
+ * page == 0 else it will use the page for the needed block.
+ *
+ * returns -EFBIG if the index is too big.
+ *
+ *
+ * The swap vector is organized the following way:
+ *
+ * There are SHMEM_NR_DIRECT entries directly stored in the
+ * shmem_inode_info structure. So small files do not need an addional
+ * allocation.
+ *
+ * For pages with index  SHMEM_NR_DIRECT there is the pointer
+ * i_indirect which points to a page which holds in the first half
+ * doubly indirect blocks, in the second half triple indirect blocks:
+ *
+ * For an artificial ENTRIES_PER_PAGE = 4 this would lead to the
+ * following layout (for SHMEM_NR_DIRECT == 16):
+ *
+ * i_indirect - dir -- 16-19
+ *   |  +- 20-23
+ *   |
+ *   +--dir2 -- 24-27
+ *   |+- 28-31
+ *   |+- 32-35
+ *   |+- 36-39
+ *   |
+ *   +--dir3 -- 40-43
+ *+- 44-47
+ *+- 48-51
+ *+- 52-55
+ */
+
+#define SHMEM_MAX_BLOCKS (SHMEM_NR_DIRECT + ENTRIES_PER_PAGE * 
+ENTRIES_PER_PAGE/2*(ENTRIES_PER_PAGE+1))
+
+static swp_entry_t * shmem_swp_entry (struct shmem_inode_info *info, unsigned long 
+index, unsigned long page) 
 {
unsigned long offset;
+   void **dir;
 
if (index  SHMEM_NR_DIRECT)
return info-i_direct+index;
@@ -99,23 +142,66 @@
offset = index % ENTRIES_PER_PAGE;
index /= ENTRIES_PER_PAGE;
 
-   if (index = ENTRIES_PER_PAGE)
-   return ERR_PTR(-EFBIG);
-
if (!info-i_indirect) {
-   info-i_indirect = (swp_entry_t **) get_zeroed_page(GFP_USER);
-   if (!info-i_indirect)
+   info-i_indirect = (void *) page;
+   return ERR_PTR(-ENOMEM);
+   }
+
+   dir = info-i_indirect + index;
+   if (index = ENTRIES_PER_PAGE/2) {
+   index -= ENTRIES_PER_PAGE/2;
+   dir = info-i_indirect + ENTRIES_PER_PAGE/2 
+   + index/ENTRIES_PER_PAGE;
+   index %= ENTRIES_PER_PAGE;
+
+   if(!*dir) {
+   *dir = (void *) page;
+   /* We return since we will need another page
+   in the next step */
return ERR_PTR(-ENOMEM);
+   }
+   dir = ((void **)*dir) + index;
}
-   if(!(info-i_indirect[index])) {
-   info-i_indirect[index] = (swp_entry_t *) get_zeroed_page(GFP_USER);
-   if (!info-i_indirect[index])
+   if (!*dir) {
+   if (!page)
return ERR_PTR(-ENOMEM);
+   *dir = (void *)page;
}
-   
-   return info-i_indirect[index]+offset;
+   

Re: 2.4.4 kernel freeze for unknown reason

2001-05-13 Thread Christoph Rohland

Hi Mike,

On Sat, 12 May 2001, Mike Galbraith wrote:
> Why do I not see this behavior with a heavy swap throughput test
> load?  It seems decidedly odd to me that swapspace should remain
> allocated on other folks lightly loaded boxen given that my heavily
> loaded box does release swapspace quite regularly.  What am I
> missing?

Are you using a database or something other which mostly uses shared
mem/tmpfs? This does reclaim swap space on swap in.

Greetings
Christoph


-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/



Re: 2.4.4 kernel freeze for unknown reason

2001-05-13 Thread Christoph Rohland

Hi Mike,

On Sat, 12 May 2001, Mike Galbraith wrote:
 Why do I not see this behavior with a heavy swap throughput test
 load?  It seems decidedly odd to me that swapspace should remain
 allocated on other folks lightly loaded boxen given that my heavily
 loaded box does release swapspace quite regularly.  What am I
 missing?

Are you using a database or something other which mostly uses shared
mem/tmpfs? This does reclaim swap space on swap in.

Greetings
Christoph


-
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/



[Patch] Do not account shmem pages to the page cache

2001-05-07 Thread Christoph Rohland

Hi,

The appended patch does it's own accounting of shmem pages and adjust
the page cache size to take these into account. So now again you will
see shmem pages as used in top/vmstat etc. This confused a lot of
people.

There is a uncertainty in the calculations since the vm may drop pages
behind shmem and the number of shmem pages is estimated too high. This
especially happens on truncate because first the page cache is reduced
and later the shmem readjusts it's count.

To prevent negative cache sizes the adjustment is only done if
shmem_nrpages > page_cache_size.

The latter part of the patch (all the init.c files) also exports the
shmem page number to the shared memory field in meminfo. This means a
change in semantics of this field but apparently a lot of people
interpret this field exactly this way and it was not used any more

The patches are on top of my encapsulation patch.

Greetings
Christoph

diff -uNr 2.4.4-mSsu/fs/proc/proc_misc.c 2.4.4-mSsua/fs/proc/proc_misc.c
--- 2.4.4-mSsu/fs/proc/proc_misc.c  Sun Apr 29 20:32:52 2001
+++ 2.4.4-mSsua/fs/proc/proc_misc.c Mon May  7 13:38:53 2001
@@ -140,6 +140,17 @@
 {
struct sysinfo i;
int len;
+   unsigned int cached, shmem;
+
+   /*
+* There may be some inconsistency because shmem_nrpages
+* update is delayed to page_cache_size
+* We make sure the cached value does not get below zero 
+*/
+   cached = atomic_read(_cache_size);
+   shmem  = atomic_read(_nrpages);
+   if (shmem < cached)
+   cached -= shmem;
 
 /*
  * display in kilobytes.
@@ -153,8 +164,8 @@
 "Swap: %8lu %8lu %8lu\n",
 B(i.totalram), B(i.totalram-i.freeram), B(i.freeram),
 B(i.sharedram), B(i.bufferram),
-B(atomic_read(_cache_size)), B(i.totalswap),
-B(i.totalswap-i.freeswap), B(i.freeswap));
+   B(cached), B(i.totalswap),
+   B(i.totalswap-i.freeswap), B(i.freeswap));
 /*
  * Tagged format, for easy grepping and expansion.
  * The above will go away eventually, once the tools
@@ -180,7 +191,7 @@
 K(i.freeram),
 K(i.sharedram),
 K(i.bufferram),
-K(atomic_read(_cache_size)),
+   K(cached),
K(nr_active_pages),
K(nr_inactive_dirty_pages),
K(nr_inactive_clean_pages()),
diff -uNr 2.4.4-mSsu/include/linux/shmem_fs.h 2.4.4-mSsua/include/linux/shmem_fs.h
--- 2.4.4-mSsu/include/linux/shmem_fs.h Wed May  2 18:36:05 2001
+++ 2.4.4-mSsua/include/linux/shmem_fs.hMon May  7 12:52:00 2001
@@ -17,6 +17,8 @@
unsigned long val;
 } swp_entry_t;
 
+extern atomic_t shmem_nrpages;
+
 struct shmem_inode_info {
spinlock_t  lock;
struct semaphoresem;
diff -uNr 2.4.4-mSsu/mm/mmap.c 2.4.4-mSsua/mm/mmap.c
--- 2.4.4-mSsu/mm/mmap.cSun Apr 29 20:33:01 2001
+++ 2.4.4-mSsua/mm/mmap.c   Mon May  7 13:42:03 2001
@@ -55,13 +55,24 @@
 */
 
long free;
-   
+   unsigned long cached, shmem;
+
+   /*
+* There may be some inconsistency because shmem_nrpages
+* update is delayed to the page_cache_size
+* We make sure the cached value does not get below zero 
+*/
+   cached = atomic_read(_cache_size);
+   shmem  = atomic_read(_nrpages);
+   if (cached > shmem)
+   cached -= shmem;
+
 /* Sometimes we want to use more memory than we have. */
if (sysctl_overcommit_memory)
return 1;
 
free = atomic_read(_pages);
-   free += atomic_read(_cache_size);
+   free += cached;
free += nr_free_pages();
free += nr_swap_pages;
 
diff -uNr 2.4.4-mSsu/mm/shmem.c 2.4.4-mSsua/mm/shmem.c
--- 2.4.4-mSsu/mm/shmem.c   Fri May  4 21:37:34 2001
+++ 2.4.4-mSsua/mm/shmem.c  Mon May  7 11:13:27 2001
@@ -3,7 +3,8 @@
  *
  * Copyright (C) 2000 Linus Torvalds.
  *  2000 Transmeta Corp.
- *  2000 Christoph Rohland
+ *  2000-2001 Christoph Rohland
+ *  2000-2001 SAP AG
  * 
  * This file is released under the GPL.
  */
@@ -45,6 +46,7 @@
 
 LIST_HEAD (shmem_inodes);
 static spinlock_t shmem_ilock = SPIN_LOCK_UNLOCKED;
+atomic_t shmem_nrpages = ATOMIC_INIT(0);
 
 #define BLOCKS_PER_PAGE (PAGE_SIZE/512)
 
@@ -52,6 +54,7 @@
  * shmem_recalc_inode - recalculate the size of an inode
  *
  * @inode: inode to recalc
+ * @swap:  additional swap pages freed externally
  *
  * We have to calculate the free blocks since the mm can drop pages
  * behind our back
@@ -62,12 +65,14 @@
  *
  * So the mm freed 
  * inodes->i_blocks/BLOCKS_PER_PAGE - 
- * (inode->i_mapping->nrpages + info->swapped)
+ * (inode->i_mapping->nrpages + info->swapped)
  *
  * It has to be called with the s

[Patch] Do not account shmem pages to the page cache

2001-05-07 Thread Christoph Rohland

Hi,

The appended patch does it's own accounting of shmem pages and adjust
the page cache size to take these into account. So now again you will
see shmem pages as used in top/vmstat etc. This confused a lot of
people.

There is a uncertainty in the calculations since the vm may drop pages
behind shmem and the number of shmem pages is estimated too high. This
especially happens on truncate because first the page cache is reduced
and later the shmem readjusts it's count.

To prevent negative cache sizes the adjustment is only done if
shmem_nrpages  page_cache_size.

The latter part of the patch (all the init.c files) also exports the
shmem page number to the shared memory field in meminfo. This means a
change in semantics of this field but apparently a lot of people
interpret this field exactly this way and it was not used any more

The patches are on top of my encapsulation patch.

Greetings
Christoph

diff -uNr 2.4.4-mSsu/fs/proc/proc_misc.c 2.4.4-mSsua/fs/proc/proc_misc.c
--- 2.4.4-mSsu/fs/proc/proc_misc.c  Sun Apr 29 20:32:52 2001
+++ 2.4.4-mSsua/fs/proc/proc_misc.c Mon May  7 13:38:53 2001
@@ -140,6 +140,17 @@
 {
struct sysinfo i;
int len;
+   unsigned int cached, shmem;
+
+   /*
+* There may be some inconsistency because shmem_nrpages
+* update is delayed to page_cache_size
+* We make sure the cached value does not get below zero 
+*/
+   cached = atomic_read(page_cache_size);
+   shmem  = atomic_read(shmem_nrpages);
+   if (shmem  cached)
+   cached -= shmem;
 
 /*
  * display in kilobytes.
@@ -153,8 +164,8 @@
 Swap: %8lu %8lu %8lu\n,
 B(i.totalram), B(i.totalram-i.freeram), B(i.freeram),
 B(i.sharedram), B(i.bufferram),
-B(atomic_read(page_cache_size)), B(i.totalswap),
-B(i.totalswap-i.freeswap), B(i.freeswap));
+   B(cached), B(i.totalswap),
+   B(i.totalswap-i.freeswap), B(i.freeswap));
 /*
  * Tagged format, for easy grepping and expansion.
  * The above will go away eventually, once the tools
@@ -180,7 +191,7 @@
 K(i.freeram),
 K(i.sharedram),
 K(i.bufferram),
-K(atomic_read(page_cache_size)),
+   K(cached),
K(nr_active_pages),
K(nr_inactive_dirty_pages),
K(nr_inactive_clean_pages()),
diff -uNr 2.4.4-mSsu/include/linux/shmem_fs.h 2.4.4-mSsua/include/linux/shmem_fs.h
--- 2.4.4-mSsu/include/linux/shmem_fs.h Wed May  2 18:36:05 2001
+++ 2.4.4-mSsua/include/linux/shmem_fs.hMon May  7 12:52:00 2001
@@ -17,6 +17,8 @@
unsigned long val;
 } swp_entry_t;
 
+extern atomic_t shmem_nrpages;
+
 struct shmem_inode_info {
spinlock_t  lock;
struct semaphoresem;
diff -uNr 2.4.4-mSsu/mm/mmap.c 2.4.4-mSsua/mm/mmap.c
--- 2.4.4-mSsu/mm/mmap.cSun Apr 29 20:33:01 2001
+++ 2.4.4-mSsua/mm/mmap.c   Mon May  7 13:42:03 2001
@@ -55,13 +55,24 @@
 */
 
long free;
-   
+   unsigned long cached, shmem;
+
+   /*
+* There may be some inconsistency because shmem_nrpages
+* update is delayed to the page_cache_size
+* We make sure the cached value does not get below zero 
+*/
+   cached = atomic_read(page_cache_size);
+   shmem  = atomic_read(shmem_nrpages);
+   if (cached  shmem)
+   cached -= shmem;
+
 /* Sometimes we want to use more memory than we have. */
if (sysctl_overcommit_memory)
return 1;
 
free = atomic_read(buffermem_pages);
-   free += atomic_read(page_cache_size);
+   free += cached;
free += nr_free_pages();
free += nr_swap_pages;
 
diff -uNr 2.4.4-mSsu/mm/shmem.c 2.4.4-mSsua/mm/shmem.c
--- 2.4.4-mSsu/mm/shmem.c   Fri May  4 21:37:34 2001
+++ 2.4.4-mSsua/mm/shmem.c  Mon May  7 11:13:27 2001
@@ -3,7 +3,8 @@
  *
  * Copyright (C) 2000 Linus Torvalds.
  *  2000 Transmeta Corp.
- *  2000 Christoph Rohland
+ *  2000-2001 Christoph Rohland
+ *  2000-2001 SAP AG
  * 
  * This file is released under the GPL.
  */
@@ -45,6 +46,7 @@
 
 LIST_HEAD (shmem_inodes);
 static spinlock_t shmem_ilock = SPIN_LOCK_UNLOCKED;
+atomic_t shmem_nrpages = ATOMIC_INIT(0);
 
 #define BLOCKS_PER_PAGE (PAGE_SIZE/512)
 
@@ -52,6 +54,7 @@
  * shmem_recalc_inode - recalculate the size of an inode
  *
  * @inode: inode to recalc
+ * @swap:  additional swap pages freed externally
  *
  * We have to calculate the free blocks since the mm can drop pages
  * behind our back
@@ -62,12 +65,14 @@
  *
  * So the mm freed 
  * inodes-i_blocks/BLOCKS_PER_PAGE - 
- * (inode-i_mapping-nrpages + info-swapped)
+ * (inode-i_mapping-nrpages + info-swapped)
  *
  * It has to be called with the spinlock held

[Resend] Collection of tmpfs patches

2001-05-06 Thread Christoph Rohland

Hi,

There is some confusion about my latest tmpfs fixes. There were three
patches which are cummulative against 2.4.4:

1) deadlock fix for write out of mmap regions. (AFAIK this is
   integrated in the -ac kernels)
2) encapsulate access to shmem_inode_info
3) Do inline symlinks

I attach all these patches to this mail in the case that somebody
missed one.

Greetings
Christoph



diff -uNr 2.4.4/include/linux/shmem_fs.h c/include/linux/shmem_fs.h
--- 2.4.4/include/linux/shmem_fs.h  Sun Apr 29 20:33:00 2001
+++ c/include/linux/shmem_fs.h  Sun Apr 29 22:43:56 2001
@@ -19,6 +19,7 @@
 
 struct shmem_inode_info {
spinlock_t  lock;
+   struct semaphore sem;
unsigned long   max_index;
swp_entry_t i_direct[SHMEM_NR_DIRECT]; /* for the first blocks */
swp_entry_t   **i_indirect; /* doubly indirect blocks */
diff -uNr 2.4.4/mm/shmem.c c/mm/shmem.c
--- 2.4.4/mm/shmem.cMon Apr 30 09:45:39 2001
+++ c/mm/shmem.cTue May  1 15:15:38 2001
@@ -161,6 +161,7 @@
swp_entry_t **base, **ptr, **last;
struct shmem_inode_info * info = >u.shmem_i;
 
+   down(>sem);
inode->i_ctime = inode->i_mtime = CURRENT_TIME;
spin_lock (>lock);
index = (inode->i_size + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
@@ -197,6 +198,7 @@
info->swapped -= freed;
shmem_recalc_inode(inode);
spin_unlock (>lock);
+   up(>sem);
 }
 
 static void shmem_delete_inode(struct inode * inode)
@@ -281,15 +283,12 @@
  * still need to guard against racing with shm_writepage(), which might
  * be trying to move the page to the swap cache as we run.
  */
-static struct page * shmem_getpage_locked(struct inode * inode, unsigned long idx)
+static struct page * shmem_getpage_locked(struct shmem_inode_info *info, struct inode 
+* inode, unsigned long idx)
 {
struct address_space * mapping = inode->i_mapping;
-   struct shmem_inode_info *info;
struct page * page;
swp_entry_t *entry;
 
-   info = >u.shmem_i;
-
 repeat:
page = find_lock_page(mapping, idx);
if (page)
@@ -393,6 +392,7 @@
 
 static int shmem_getpage(struct inode * inode, unsigned long idx, struct page **ptr)
 {
+   struct shmem_inode_info *info;
struct address_space * mapping = inode->i_mapping;
int error;
 
@@ -407,27 +407,28 @@
page_cache_release(*ptr);
}
 
-   down (>i_sem);
-   /* retest we may have slept */
+   info = >u.shmem_i;
+   down (>sem);
+   /* retest we may have slept */  
+
+   *ptr = ERR_PTR(-EFAULT);
if (inode->i_size < (loff_t) idx * PAGE_CACHE_SIZE)
-   goto sigbus;
-   *ptr = shmem_getpage_locked(inode, idx);
+   goto failed;
+
+   *ptr = shmem_getpage_locked(>u.shmem_i, inode, idx);
if (IS_ERR (*ptr))
goto failed;
+
UnlockPage(*ptr);
-   up (>i_sem);
+   up (>sem);
return 0;
 failed:
-   up (>i_sem);
+   up (>sem);
error = PTR_ERR(*ptr);
-   *ptr = NOPAGE_OOM;
-   if (error != -EFBIG)
-   *ptr = NOPAGE_SIGBUS;
-   return error;
-sigbus:
-   up (>i_sem);
*ptr = NOPAGE_SIGBUS;
-   return -EFAULT;
+   if (error == -ENOMEM)
+   *ptr = NOPAGE_OOM;
+   return error;
 }
 
 struct page * shmem_nopage(struct vm_area_struct * vma, unsigned long address, int 
no_share)
@@ -500,6 +501,7 @@
 struct inode *shmem_get_inode(struct super_block *sb, int mode, int dev)
 {
struct inode * inode;
+   struct shmem_inode_info *info;
 
spin_lock (>u.shmem_sb.stat_lock);
if (!sb->u.shmem_sb.free_inodes) {
@@ -519,7 +521,9 @@
inode->i_rdev = to_kdev_t(dev);
inode->i_mapping->a_ops = _aops;
inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
-   spin_lock_init (>u.shmem_i.lock);
+   info = >u.shmem_i;
+   spin_lock_init (>lock);
+   sema_init (>sem, 1);
switch (mode & S_IFMT) {
default:
init_special_inode(inode, mode, dev);
@@ -549,6 +553,7 @@
 shmem_file_write(struct file *file,const char *buf,size_t count,loff_t *ppos)
 {
struct inode*inode = file->f_dentry->d_inode; 
+   struct shmem_inode_info *info;
unsigned long   limit = current->rlim[RLIMIT_FSIZE].rlim_cur;
loff_t  pos;
struct page *page;
@@ -624,7 +629,11 @@
__get_user(dummy, buf+bytes-1);
}
 
-   page = shmem_getpage_locked(inode, index);
+   info = >u.shmem_i;
+   down (>sem);
+   page = shmem_getpage_locked(info, inode, index);
+   up (>sem);
+
status = PTR_ERR(page);
if (IS_ERR(page))
break;
@@ -635,7 +644,6 @@
}
 
 

[Resend] Collection of tmpfs patches

2001-05-06 Thread Christoph Rohland

Hi,

There is some confusion about my latest tmpfs fixes. There were three
patches which are cummulative against 2.4.4:

1) deadlock fix for write out of mmap regions. (AFAIK this is
   integrated in the -ac kernels)
2) encapsulate access to shmem_inode_info
3) Do inline symlinks

I attach all these patches to this mail in the case that somebody
missed one.

Greetings
Christoph



diff -uNr 2.4.4/include/linux/shmem_fs.h c/include/linux/shmem_fs.h
--- 2.4.4/include/linux/shmem_fs.h  Sun Apr 29 20:33:00 2001
+++ c/include/linux/shmem_fs.h  Sun Apr 29 22:43:56 2001
@@ -19,6 +19,7 @@
 
 struct shmem_inode_info {
spinlock_t  lock;
+   struct semaphore sem;
unsigned long   max_index;
swp_entry_t i_direct[SHMEM_NR_DIRECT]; /* for the first blocks */
swp_entry_t   **i_indirect; /* doubly indirect blocks */
diff -uNr 2.4.4/mm/shmem.c c/mm/shmem.c
--- 2.4.4/mm/shmem.cMon Apr 30 09:45:39 2001
+++ c/mm/shmem.cTue May  1 15:15:38 2001
@@ -161,6 +161,7 @@
swp_entry_t **base, **ptr, **last;
struct shmem_inode_info * info = inode-u.shmem_i;
 
+   down(info-sem);
inode-i_ctime = inode-i_mtime = CURRENT_TIME;
spin_lock (info-lock);
index = (inode-i_size + PAGE_CACHE_SIZE - 1)  PAGE_CACHE_SHIFT;
@@ -197,6 +198,7 @@
info-swapped -= freed;
shmem_recalc_inode(inode);
spin_unlock (info-lock);
+   up(info-sem);
 }
 
 static void shmem_delete_inode(struct inode * inode)
@@ -281,15 +283,12 @@
  * still need to guard against racing with shm_writepage(), which might
  * be trying to move the page to the swap cache as we run.
  */
-static struct page * shmem_getpage_locked(struct inode * inode, unsigned long idx)
+static struct page * shmem_getpage_locked(struct shmem_inode_info *info, struct inode 
+* inode, unsigned long idx)
 {
struct address_space * mapping = inode-i_mapping;
-   struct shmem_inode_info *info;
struct page * page;
swp_entry_t *entry;
 
-   info = inode-u.shmem_i;
-
 repeat:
page = find_lock_page(mapping, idx);
if (page)
@@ -393,6 +392,7 @@
 
 static int shmem_getpage(struct inode * inode, unsigned long idx, struct page **ptr)
 {
+   struct shmem_inode_info *info;
struct address_space * mapping = inode-i_mapping;
int error;
 
@@ -407,27 +407,28 @@
page_cache_release(*ptr);
}
 
-   down (inode-i_sem);
-   /* retest we may have slept */
+   info = inode-u.shmem_i;
+   down (info-sem);
+   /* retest we may have slept */  
+
+   *ptr = ERR_PTR(-EFAULT);
if (inode-i_size  (loff_t) idx * PAGE_CACHE_SIZE)
-   goto sigbus;
-   *ptr = shmem_getpage_locked(inode, idx);
+   goto failed;
+
+   *ptr = shmem_getpage_locked(inode-u.shmem_i, inode, idx);
if (IS_ERR (*ptr))
goto failed;
+
UnlockPage(*ptr);
-   up (inode-i_sem);
+   up (info-sem);
return 0;
 failed:
-   up (inode-i_sem);
+   up (info-sem);
error = PTR_ERR(*ptr);
-   *ptr = NOPAGE_OOM;
-   if (error != -EFBIG)
-   *ptr = NOPAGE_SIGBUS;
-   return error;
-sigbus:
-   up (inode-i_sem);
*ptr = NOPAGE_SIGBUS;
-   return -EFAULT;
+   if (error == -ENOMEM)
+   *ptr = NOPAGE_OOM;
+   return error;
 }
 
 struct page * shmem_nopage(struct vm_area_struct * vma, unsigned long address, int 
no_share)
@@ -500,6 +501,7 @@
 struct inode *shmem_get_inode(struct super_block *sb, int mode, int dev)
 {
struct inode * inode;
+   struct shmem_inode_info *info;
 
spin_lock (sb-u.shmem_sb.stat_lock);
if (!sb-u.shmem_sb.free_inodes) {
@@ -519,7 +521,9 @@
inode-i_rdev = to_kdev_t(dev);
inode-i_mapping-a_ops = shmem_aops;
inode-i_atime = inode-i_mtime = inode-i_ctime = CURRENT_TIME;
-   spin_lock_init (inode-u.shmem_i.lock);
+   info = inode-u.shmem_i;
+   spin_lock_init (info-lock);
+   sema_init (info-sem, 1);
switch (mode  S_IFMT) {
default:
init_special_inode(inode, mode, dev);
@@ -549,6 +553,7 @@
 shmem_file_write(struct file *file,const char *buf,size_t count,loff_t *ppos)
 {
struct inode*inode = file-f_dentry-d_inode; 
+   struct shmem_inode_info *info;
unsigned long   limit = current-rlim[RLIMIT_FSIZE].rlim_cur;
loff_t  pos;
struct page *page;
@@ -624,7 +629,11 @@
__get_user(dummy, buf+bytes-1);
}
 
-   page = shmem_getpage_locked(inode, index);
+   info = inode-u.shmem_i;
+   down (info-sem);
+   page = shmem_getpage_locked(info, inode, index);
+   up (info-sem);
+
status = PTR_ERR(page);
if 

[Patch] inline symlinks for tmpfs

2001-05-05 Thread Christoph Rohland

Hi David,

On Tue, 24 Apr 2001, David L. Parsley wrote:
>> OK I will do that for tmpfs soon. And I will do the symlink
>> inlining with that patch.

OK, here comes the patch for the symlink inlining. It is on top of my
previous patch to encapsulate access to the private inode info.

Greetings
Christoph



diff -uNr 2.4.4-mmap_write-SHMEM_I/mm/shmem.c 
2.4.4-mmap_write-SHMEM_I-symlink/mm/shmem.c
--- 2.4.4-mmap_write-SHMEM_I/mm/shmem.c Fri May  4 21:32:22 2001
+++ 2.4.4-mmap_write-SHMEM_I-symlink/mm/shmem.c Fri May  4 21:37:34 2001
@@ -41,7 +41,6 @@
 static struct inode_operations shmem_inode_operations;
 static struct file_operations shmem_dir_operations;
 static struct inode_operations shmem_dir_inode_operations;
-static struct inode_operations shmem_symlink_inode_operations;
 static struct vm_operations_struct shmem_vm_ops;
 
 LIST_HEAD (shmem_inodes);
@@ -205,11 +204,13 @@
 {
struct shmem_sb_info *info = >i_sb->u.shmem_sb;
 
-   spin_lock (_ilock);
-   list_del (_I(inode)->list);
-   spin_unlock (_ilock);
inode->i_size = 0;
-   shmem_truncate (inode);
+   if (inode->i_op->truncate == shmem_truncate){ 
+   spin_lock (_ilock);
+   list_del (_I(inode)->list);
+   spin_unlock (_ilock);
+   shmem_truncate(inode);
+   }
spin_lock (>stat_lock);
info->free_inodes++;
spin_unlock (>stat_lock);
@@ -532,6 +533,9 @@
case S_IFREG:
inode->i_op = _inode_operations;
inode->i_fop = _file_operations;
+   spin_lock (_ilock);
+   list_add (_I(inode)->list, _inodes);
+   spin_unlock (_ilock);
break;
case S_IFDIR:
inode->i_nlink++;
@@ -539,17 +543,17 @@
inode->i_fop = _dir_operations;
break;
case S_IFLNK:
-   inode->i_op = _symlink_inode_operations;
break;
}
-   spin_lock (_ilock);
-   list_add (_I(inode)->list, _inodes);
-   spin_unlock (_ilock);
}
return inode;
 }
 
 #ifdef CONFIG_TMPFS
+
+static struct inode_operations shmem_symlink_inode_operations;
+static struct inode_operations shmem_symlink_inline_operations;
+
 static ssize_t
 shmem_file_write(struct file *file,const char *buf,size_t count,loff_t *ppos)
 {
@@ -930,33 +934,54 @@
struct inode *inode;
struct page *page;
char *kaddr;
+   struct shmem_inode_info * info;
 
error = shmem_mknod(dir, dentry, S_IFLNK | S_IRWXUGO, 0);
if (error)
return error;
 
-   len = strlen(symname);
+   len = strlen(symname) + 1;
if (len > PAGE_SIZE)
return -ENAMETOOLONG;
-   
+
inode = dentry->d_inode;
-   down(>i_sem);
-   page = shmem_getpage_locked(SHMEM_I(inode), inode, 0);
-   if (IS_ERR(page))
-   goto fail;
-   kaddr = kmap(page);
-   memcpy(kaddr, symname, len);
-   kunmap(page);
+   info = SHMEM_I(inode);
inode->i_size = len;
-   SetPageDirty(page);
-   UnlockPage(page);
-   page_cache_release(page);
-   up(>i_sem);
+   if (len <= sizeof(struct shmem_inode_info)) {
+   /* do it inline */
+   memcpy(info, symname, len);
+   inode->i_op = _symlink_inline_operations;
+   } else {
+   spin_lock (_ilock);
+   list_add (>list, _inodes);
+   spin_unlock (_ilock);
+   down(>i_sem);
+   page = shmem_getpage_locked(info, inode, 0);
+   if (IS_ERR(page)) {
+   up(>i_sem);
+   return PTR_ERR(page);
+   }
+   kaddr = kmap(page);
+   memcpy(kaddr, symname, len);
+   kunmap(page);
+   SetPageDirty(page);
+   UnlockPage(page);
+   page_cache_release(page);
+   up(>i_sem);
+   inode->i_op = _symlink_inode_operations;
+   }
dir->i_ctime = dir->i_mtime = CURRENT_TIME;
return 0;
-fail:
-   up(>i_sem);
-   return PTR_ERR(page);
+}
+
+static int shmem_readlink_inline(struct dentry *dentry, char *buffer, int buflen)
+{
+   return vfs_readlink(dentry,buffer,buflen, (const char 
+*)SHMEM_I(dentry->d_inode));
+}
+
+static int shmem_follow_link_inline(struct dentry *dentry, struct nameidata *nd)
+{
+   return vfs_follow_link(nd, (const char *)SHMEM_I(dentry->d_inode));
 }
 
 static int shmem_readlink(struct dentry *dentry, char *buffer, int buflen)
@@ -986,6 +1011,17 @@
return res;
 }
 
+static struct inode_operations shmem_symlink_inline_operations = {
+   readlink:   shmem_readlink_inline,
+   follow_link:shmem_follow_link_inline,

[Patch] inline symlinks for tmpfs

2001-05-05 Thread Christoph Rohland

Hi David,

On Tue, 24 Apr 2001, David L. Parsley wrote:
 OK I will do that for tmpfs soon. And I will do the symlink
 inlining with that patch.

OK, here comes the patch for the symlink inlining. It is on top of my
previous patch to encapsulate access to the private inode info.

Greetings
Christoph



diff -uNr 2.4.4-mmap_write-SHMEM_I/mm/shmem.c 
2.4.4-mmap_write-SHMEM_I-symlink/mm/shmem.c
--- 2.4.4-mmap_write-SHMEM_I/mm/shmem.c Fri May  4 21:32:22 2001
+++ 2.4.4-mmap_write-SHMEM_I-symlink/mm/shmem.c Fri May  4 21:37:34 2001
@@ -41,7 +41,6 @@
 static struct inode_operations shmem_inode_operations;
 static struct file_operations shmem_dir_operations;
 static struct inode_operations shmem_dir_inode_operations;
-static struct inode_operations shmem_symlink_inode_operations;
 static struct vm_operations_struct shmem_vm_ops;
 
 LIST_HEAD (shmem_inodes);
@@ -205,11 +204,13 @@
 {
struct shmem_sb_info *info = inode-i_sb-u.shmem_sb;
 
-   spin_lock (shmem_ilock);
-   list_del (SHMEM_I(inode)-list);
-   spin_unlock (shmem_ilock);
inode-i_size = 0;
-   shmem_truncate (inode);
+   if (inode-i_op-truncate == shmem_truncate){ 
+   spin_lock (shmem_ilock);
+   list_del (SHMEM_I(inode)-list);
+   spin_unlock (shmem_ilock);
+   shmem_truncate(inode);
+   }
spin_lock (info-stat_lock);
info-free_inodes++;
spin_unlock (info-stat_lock);
@@ -532,6 +533,9 @@
case S_IFREG:
inode-i_op = shmem_inode_operations;
inode-i_fop = shmem_file_operations;
+   spin_lock (shmem_ilock);
+   list_add (SHMEM_I(inode)-list, shmem_inodes);
+   spin_unlock (shmem_ilock);
break;
case S_IFDIR:
inode-i_nlink++;
@@ -539,17 +543,17 @@
inode-i_fop = shmem_dir_operations;
break;
case S_IFLNK:
-   inode-i_op = shmem_symlink_inode_operations;
break;
}
-   spin_lock (shmem_ilock);
-   list_add (SHMEM_I(inode)-list, shmem_inodes);
-   spin_unlock (shmem_ilock);
}
return inode;
 }
 
 #ifdef CONFIG_TMPFS
+
+static struct inode_operations shmem_symlink_inode_operations;
+static struct inode_operations shmem_symlink_inline_operations;
+
 static ssize_t
 shmem_file_write(struct file *file,const char *buf,size_t count,loff_t *ppos)
 {
@@ -930,33 +934,54 @@
struct inode *inode;
struct page *page;
char *kaddr;
+   struct shmem_inode_info * info;
 
error = shmem_mknod(dir, dentry, S_IFLNK | S_IRWXUGO, 0);
if (error)
return error;
 
-   len = strlen(symname);
+   len = strlen(symname) + 1;
if (len  PAGE_SIZE)
return -ENAMETOOLONG;
-   
+
inode = dentry-d_inode;
-   down(inode-i_sem);
-   page = shmem_getpage_locked(SHMEM_I(inode), inode, 0);
-   if (IS_ERR(page))
-   goto fail;
-   kaddr = kmap(page);
-   memcpy(kaddr, symname, len);
-   kunmap(page);
+   info = SHMEM_I(inode);
inode-i_size = len;
-   SetPageDirty(page);
-   UnlockPage(page);
-   page_cache_release(page);
-   up(inode-i_sem);
+   if (len = sizeof(struct shmem_inode_info)) {
+   /* do it inline */
+   memcpy(info, symname, len);
+   inode-i_op = shmem_symlink_inline_operations;
+   } else {
+   spin_lock (shmem_ilock);
+   list_add (info-list, shmem_inodes);
+   spin_unlock (shmem_ilock);
+   down(inode-i_sem);
+   page = shmem_getpage_locked(info, inode, 0);
+   if (IS_ERR(page)) {
+   up(inode-i_sem);
+   return PTR_ERR(page);
+   }
+   kaddr = kmap(page);
+   memcpy(kaddr, symname, len);
+   kunmap(page);
+   SetPageDirty(page);
+   UnlockPage(page);
+   page_cache_release(page);
+   up(inode-i_sem);
+   inode-i_op = shmem_symlink_inode_operations;
+   }
dir-i_ctime = dir-i_mtime = CURRENT_TIME;
return 0;
-fail:
-   up(inode-i_sem);
-   return PTR_ERR(page);
+}
+
+static int shmem_readlink_inline(struct dentry *dentry, char *buffer, int buflen)
+{
+   return vfs_readlink(dentry,buffer,buflen, (const char 
+*)SHMEM_I(dentry-d_inode));
+}
+
+static int shmem_follow_link_inline(struct dentry *dentry, struct nameidata *nd)
+{
+   return vfs_follow_link(nd, (const char *)SHMEM_I(dentry-d_inode));
 }
 
 static int shmem_readlink(struct dentry *dentry, char *buffer, int buflen)
@@ -986,6 +1011,17 @@
return res;
 }
 
+static struct 

[Patch] encapsulate shmem access to shmem_inode_info

2001-05-04 Thread Christoph Rohland

Hi,

On 24 Apr 2001, Christoph Rohland wrote:
> Hi Al,
> 
> On Tue, 24 Apr 2001, Alexander Viro wrote:
>> So yes, IMO having such patches available _is_ a good thing. And in
>> 2.5 we definitely want them in the tree. If encapsulation part gets
>> there during 2.4 and separate allocation is available for all of
>> them it will be easier to do without PITA in process.
> 
> OK I will do that for tmpfs soon. And I will do the symlink inlining
> with that patch.

Here comes the patch to encapsulate all accesses to struct
shmem_inode_info into a macro. It is now trivial to allocate the
private part independently from the inode.

Greetings
Christoph

P.S: The symlink inlining will come in a separate patch

diff -uNr 2.4.4-mmap_write/include/linux/shmem_fs.h 
2.4.4-mmap_write-SHMEM_I/include/linux/shmem_fs.h
--- 2.4.4-mmap_write/include/linux/shmem_fs.h   Tue May  1 20:02:00 2001
+++ 2.4.4-mmap_write-SHMEM_I/include/linux/shmem_fs.h   Tue May  1 20:06:10 2001
@@ -18,14 +18,15 @@
 } swp_entry_t;
 
 struct shmem_inode_info {
-   spinlock_t  lock;
-   struct semaphore sem;
-   unsigned long   max_index;
-   swp_entry_t i_direct[SHMEM_NR_DIRECT]; /* for the first blocks */
-   swp_entry_t   **i_indirect; /* doubly indirect blocks */
-   unsigned long   swapped;
-   int locked; /* into memory */
+   spinlock_t  lock;
+   struct semaphoresem;
+   unsigned long   max_index;
+   swp_entry_t i_direct[SHMEM_NR_DIRECT]; /* for the first blocks */
+   swp_entry_t   **i_indirect; /* doubly indirect blocks */
+   unsigned long   swapped;
+   int locked; /* into memory */
struct list_headlist;
+   struct inode   *inode;
 };
 
 struct shmem_sb_info {
@@ -35,5 +36,7 @@
unsigned long free_inodes;  /* How many are left for allocation */
spinlock_tstat_lock;
 };
+
+#define SHMEM_I(inode)  (>u.shmem_i)
 
 #endif
diff -uNr 2.4.4-mmap_write/ipc/shm.c 2.4.4-mmap_write-SHMEM_I/ipc/shm.c
--- 2.4.4-mmap_write/ipc/shm.c  Wed Apr 11 12:36:47 2001
+++ 2.4.4-mmap_write-SHMEM_I/ipc/shm.c  Tue May  1 20:06:10 2001
@@ -348,6 +348,7 @@
 
 static void shm_get_stat (unsigned long *rss, unsigned long *swp) 
 {
+   struct shmem_inode_info *info;
int i;
 
*rss = 0;
@@ -361,10 +362,11 @@
if(shp == NULL)
continue;
inode = shp->shm_file->f_dentry->d_inode;
-   spin_lock (>u.shmem_i.lock);
+   info = SHMEM_I(inode);
+   spin_lock (>lock);
*rss += inode->i_mapping->nrpages;
-   *swp += inode->u.shmem_i.swapped;
-   spin_unlock (>u.shmem_i.lock);
+   *swp += info->swapped;
+   spin_unlock (>lock);
}
 }
 
diff -uNr 2.4.4-mmap_write/mm/shmem.c 2.4.4-mmap_write-SHMEM_I/mm/shmem.c
--- 2.4.4-mmap_write/mm/shmem.c Tue May  1 20:02:00 2001
+++ 2.4.4-mmap_write-SHMEM_I/mm/shmem.c Wed May  2 16:46:00 2001
@@ -73,7 +73,7 @@
unsigned long freed;
 
freed = (inode->i_blocks/BLOCKS_PER_PAGE) -
-   (inode->i_mapping->nrpages + inode->u.shmem_i.swapped);
+   (inode->i_mapping->nrpages + SHMEM_I(inode)->swapped);
if (freed){
struct shmem_sb_info * info = >i_sb->u.shmem_sb;
inode->i_blocks -= freed*BLOCKS_PER_PAGE;
@@ -159,7 +159,7 @@
unsigned long index, start;
unsigned long freed = 0;
swp_entry_t **base, **ptr, **last;
-   struct shmem_inode_info * info = >u.shmem_i;
+   struct shmem_inode_info * info = SHMEM_I(inode);
 
down(>sem);
inode->i_ctime = inode->i_mtime = CURRENT_TIME;
@@ -206,7 +206,7 @@
struct shmem_sb_info *info = >i_sb->u.shmem_sb;
 
spin_lock (_ilock);
-   list_del (>u.shmem_i.list);
+   list_del (_I(inode)->list);
spin_unlock (_ilock);
inode->i_size = 0;
shmem_truncate (inode);
@@ -239,7 +239,7 @@
goto out;

inode = page->mapping->host;
-   info = >u.shmem_i;
+   info = SHMEM_I(inode);
swap = __get_swap_page(2);
error = -ENOMEM;
if (!swap.val)
@@ -407,7 +407,7 @@
page_cache_release(*ptr);
}
 
-   info = >u.shmem_i;
+   info = SHMEM_I(inode);
down (>sem);
/* retest we may have slept */  
 
@@ -415,7 +415,7 @@
if (inode->i_size < (loff_t) idx * PAGE_CACHE_SIZE)
goto failed;
 
-   *ptr = shmem_getpage_locked(>u.shmem_i, inode, idx);
+   *ptr = shmem_getpage_locked(info, inode, idx);
if (IS_ERR (*ptr))
goto failed;
 
@@ -462,7 +462,7 @@
 void shmem_lo

Re: tmpfs doesn't update free memory stats?

2001-05-04 Thread Christoph Rohland

Hi Jacek,

On Fri, 4 May 2001, Jacek Kopecky wrote:
>  I'm not in the list, please cc your replies to me.
>  After upgrading to 2.4.4 I started using tmpfs for /tmp and I
> noticed a strange behavior:
> 
>  dd if=/dev/zero of=blah bs=1024 count=102400
>   # increased my used swap space by approx. 100MiB (correct)
>  rm blah
>   # did not decrease it back
> 
>  Multiple retries showed what looked like a random behavior of
> the used swap stats. Is this a correct behavior? Should the swap
> stats be dismissed as 'unreliable'? I expected that when creating
> a 100MiB file in memory it should increase the swap (or memory)
> usage by cca 100MiB and that removing a file from tmpfs means
> freeing the memory.

It will be adjusted under memory pressure. At this time there is no
way to release swap cached pages without the potential of deadlocks.

This is not nice but the only short term solution and should not
affect anything besides stats.

Greetings
Christoph


-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/



Re: tmpfs doesn't update free memory stats?

2001-05-04 Thread Christoph Rohland

Hi Jacek,

On Fri, 4 May 2001, Jacek Kopecky wrote:
  I'm not in the list, please cc your replies to me.
  After upgrading to 2.4.4 I started using tmpfs for /tmp and I
 noticed a strange behavior:
 
  dd if=/dev/zero of=blah bs=1024 count=102400
   # increased my used swap space by approx. 100MiB (correct)
  rm blah
   # did not decrease it back
 
  Multiple retries showed what looked like a random behavior of
 the used swap stats. Is this a correct behavior? Should the swap
 stats be dismissed as 'unreliable'? I expected that when creating
 a 100MiB file in memory it should increase the swap (or memory)
 usage by cca 100MiB and that removing a file from tmpfs means
 freeing the memory.

It will be adjusted under memory pressure. At this time there is no
way to release swap cached pages without the potential of deadlocks.

This is not nice but the only short term solution and should not
affect anything besides stats.

Greetings
Christoph


-
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/



[Patch] encapsulate shmem access to shmem_inode_info

2001-05-04 Thread Christoph Rohland

Hi,

On 24 Apr 2001, Christoph Rohland wrote:
 Hi Al,
 
 On Tue, 24 Apr 2001, Alexander Viro wrote:
 So yes, IMO having such patches available _is_ a good thing. And in
 2.5 we definitely want them in the tree. If encapsulation part gets
 there during 2.4 and separate allocation is available for all of
 them it will be easier to do without PITA in process.
 
 OK I will do that for tmpfs soon. And I will do the symlink inlining
 with that patch.

Here comes the patch to encapsulate all accesses to struct
shmem_inode_info into a macro. It is now trivial to allocate the
private part independently from the inode.

Greetings
Christoph

P.S: The symlink inlining will come in a separate patch

diff -uNr 2.4.4-mmap_write/include/linux/shmem_fs.h 
2.4.4-mmap_write-SHMEM_I/include/linux/shmem_fs.h
--- 2.4.4-mmap_write/include/linux/shmem_fs.h   Tue May  1 20:02:00 2001
+++ 2.4.4-mmap_write-SHMEM_I/include/linux/shmem_fs.h   Tue May  1 20:06:10 2001
@@ -18,14 +18,15 @@
 } swp_entry_t;
 
 struct shmem_inode_info {
-   spinlock_t  lock;
-   struct semaphore sem;
-   unsigned long   max_index;
-   swp_entry_t i_direct[SHMEM_NR_DIRECT]; /* for the first blocks */
-   swp_entry_t   **i_indirect; /* doubly indirect blocks */
-   unsigned long   swapped;
-   int locked; /* into memory */
+   spinlock_t  lock;
+   struct semaphoresem;
+   unsigned long   max_index;
+   swp_entry_t i_direct[SHMEM_NR_DIRECT]; /* for the first blocks */
+   swp_entry_t   **i_indirect; /* doubly indirect blocks */
+   unsigned long   swapped;
+   int locked; /* into memory */
struct list_headlist;
+   struct inode   *inode;
 };
 
 struct shmem_sb_info {
@@ -35,5 +36,7 @@
unsigned long free_inodes;  /* How many are left for allocation */
spinlock_tstat_lock;
 };
+
+#define SHMEM_I(inode)  (inode-u.shmem_i)
 
 #endif
diff -uNr 2.4.4-mmap_write/ipc/shm.c 2.4.4-mmap_write-SHMEM_I/ipc/shm.c
--- 2.4.4-mmap_write/ipc/shm.c  Wed Apr 11 12:36:47 2001
+++ 2.4.4-mmap_write-SHMEM_I/ipc/shm.c  Tue May  1 20:06:10 2001
@@ -348,6 +348,7 @@
 
 static void shm_get_stat (unsigned long *rss, unsigned long *swp) 
 {
+   struct shmem_inode_info *info;
int i;
 
*rss = 0;
@@ -361,10 +362,11 @@
if(shp == NULL)
continue;
inode = shp-shm_file-f_dentry-d_inode;
-   spin_lock (inode-u.shmem_i.lock);
+   info = SHMEM_I(inode);
+   spin_lock (info-lock);
*rss += inode-i_mapping-nrpages;
-   *swp += inode-u.shmem_i.swapped;
-   spin_unlock (inode-u.shmem_i.lock);
+   *swp += info-swapped;
+   spin_unlock (info-lock);
}
 }
 
diff -uNr 2.4.4-mmap_write/mm/shmem.c 2.4.4-mmap_write-SHMEM_I/mm/shmem.c
--- 2.4.4-mmap_write/mm/shmem.c Tue May  1 20:02:00 2001
+++ 2.4.4-mmap_write-SHMEM_I/mm/shmem.c Wed May  2 16:46:00 2001
@@ -73,7 +73,7 @@
unsigned long freed;
 
freed = (inode-i_blocks/BLOCKS_PER_PAGE) -
-   (inode-i_mapping-nrpages + inode-u.shmem_i.swapped);
+   (inode-i_mapping-nrpages + SHMEM_I(inode)-swapped);
if (freed){
struct shmem_sb_info * info = inode-i_sb-u.shmem_sb;
inode-i_blocks -= freed*BLOCKS_PER_PAGE;
@@ -159,7 +159,7 @@
unsigned long index, start;
unsigned long freed = 0;
swp_entry_t **base, **ptr, **last;
-   struct shmem_inode_info * info = inode-u.shmem_i;
+   struct shmem_inode_info * info = SHMEM_I(inode);
 
down(info-sem);
inode-i_ctime = inode-i_mtime = CURRENT_TIME;
@@ -206,7 +206,7 @@
struct shmem_sb_info *info = inode-i_sb-u.shmem_sb;
 
spin_lock (shmem_ilock);
-   list_del (inode-u.shmem_i.list);
+   list_del (SHMEM_I(inode)-list);
spin_unlock (shmem_ilock);
inode-i_size = 0;
shmem_truncate (inode);
@@ -239,7 +239,7 @@
goto out;

inode = page-mapping-host;
-   info = inode-u.shmem_i;
+   info = SHMEM_I(inode);
swap = __get_swap_page(2);
error = -ENOMEM;
if (!swap.val)
@@ -407,7 +407,7 @@
page_cache_release(*ptr);
}
 
-   info = inode-u.shmem_i;
+   info = SHMEM_I(inode);
down (info-sem);
/* retest we may have slept */  
 
@@ -415,7 +415,7 @@
if (inode-i_size  (loff_t) idx * PAGE_CACHE_SIZE)
goto failed;
 
-   *ptr = shmem_getpage_locked(inode-u.shmem_i, inode, idx);
+   *ptr = shmem_getpage_locked(info, inode, idx);
if (IS_ERR (*ptr))
goto failed;
 
@@ -462,7 +462,7 @@
 void shmem_lock(struct file * file, int lock)
 {
struct inode * inode = file-f_dentry-d_inode;
-   struct shmem_inode_info

Re: [Patch] deadlock on write in tmpfs

2001-05-02 Thread Christoph Rohland

Hi Stephen,

On Tue, 1 May 2001, Stephen C. Tweedie wrote:
> If the locking is for a completely different reason, then a
> different semaphore is quite appropriate.  In this case you're
> trying to lock the shm internal info structures, which is quite
> different from the sort of inode locking which the VFS tries to do
> itself, so the new semaphore appears quite clean --- and definitely
> needed.

It's not the addition to the inode semaphore I do care about, but the
addition to the spin lock which protects also the shmem internals. But
you are probably right: It only protects the onthefly pages between
page cache and swap cache.

Greetings
Christoph


-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/



Re: [Patch] deadlock on write in tmpfs

2001-05-02 Thread Christoph Rohland

Hi Stephen,

On Tue, 1 May 2001, Stephen C. Tweedie wrote:
 If the locking is for a completely different reason, then a
 different semaphore is quite appropriate.  In this case you're
 trying to lock the shm internal info structures, which is quite
 different from the sort of inode locking which the VFS tries to do
 itself, so the new semaphore appears quite clean --- and definitely
 needed.

It's not the addition to the inode semaphore I do care about, but the
addition to the spin lock which protects also the shmem internals. But
you are probably right: It only protects the onthefly pages between
page cache and swap cache.

Greetings
Christoph


-
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/



[Patch] deadlock on write in tmpfs

2001-05-01 Thread Christoph Rohland

Hi Linus and Stephen,

tmpfs deadlocks when writing into a file from a mapping of the same
file. 

The problem is the following:

- shmem_file_write may call shmem_no_page and calls
  shmem_getpage_locked later,
- shmem_no_page calls shmem_getpage_locked
- shmem_getpage_locked may call shmem_writepage on page allocation

- shmem_file_write holds the inode semaphore
- shmem_getpage_locked prevent races against shmem_writepage with the
  shmem spinlock
- shmem_getpage_locked needs serialization against itself and
  shmem_truncate

The last was done with the inode semaphore, which deadlocks with
shmem_write

So I see two choices: 

1) Do not serialise the whole of shmem_getpage_locked but protect
   critical pathes with the spinlock and do retries after sleeps
2) Add another semaphore to serialize shmem_getpage_locked and
   shmem_truncate

I tried some time to get 1) done but the retry logic became way too
complicated. So the attached patch implements 2)

I still think it's ugly to add another semaphore, but it works.

Greetings
Christoph

diff -uNr 2.4.4/include/linux/shmem_fs.h c/include/linux/shmem_fs.h
--- 2.4.4/include/linux/shmem_fs.h  Sun Apr 29 20:33:00 2001
+++ c/include/linux/shmem_fs.h  Sun Apr 29 22:43:56 2001
@@ -19,6 +19,7 @@
 
 struct shmem_inode_info {
spinlock_t  lock;
+   struct semaphore sem;
unsigned long   max_index;
swp_entry_t i_direct[SHMEM_NR_DIRECT]; /* for the first blocks */
swp_entry_t   **i_indirect; /* doubly indirect blocks */
diff -uNr 2.4.4/mm/shmem.c c/mm/shmem.c
--- 2.4.4/mm/shmem.cMon Apr 30 09:45:39 2001
+++ c/mm/shmem.cTue May  1 15:15:38 2001
@@ -161,6 +161,7 @@
swp_entry_t **base, **ptr, **last;
struct shmem_inode_info * info = >u.shmem_i;
 
+   down(>sem);
inode->i_ctime = inode->i_mtime = CURRENT_TIME;
spin_lock (>lock);
index = (inode->i_size + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
@@ -197,6 +198,7 @@
info->swapped -= freed;
shmem_recalc_inode(inode);
spin_unlock (>lock);
+   up(>sem);
 }
 
 static void shmem_delete_inode(struct inode * inode)
@@ -281,15 +283,12 @@
  * still need to guard against racing with shm_writepage(), which might
  * be trying to move the page to the swap cache as we run.
  */
-static struct page * shmem_getpage_locked(struct inode * inode, unsigned long idx)
+static struct page * shmem_getpage_locked(struct shmem_inode_info *info, struct inode 
+* inode, unsigned long idx)
 {
struct address_space * mapping = inode->i_mapping;
-   struct shmem_inode_info *info;
struct page * page;
swp_entry_t *entry;
 
-   info = >u.shmem_i;
-
 repeat:
page = find_lock_page(mapping, idx);
if (page)
@@ -393,6 +392,7 @@
 
 static int shmem_getpage(struct inode * inode, unsigned long idx, struct page **ptr)
 {
+   struct shmem_inode_info *info;
struct address_space * mapping = inode->i_mapping;
int error;
 
@@ -407,27 +407,28 @@
page_cache_release(*ptr);
}
 
-   down (>i_sem);
-   /* retest we may have slept */
+   info = >u.shmem_i;
+   down (>sem);
+   /* retest we may have slept */  
+
+   *ptr = ERR_PTR(-EFAULT);
if (inode->i_size < (loff_t) idx * PAGE_CACHE_SIZE)
-   goto sigbus;
-   *ptr = shmem_getpage_locked(inode, idx);
+   goto failed;
+
+   *ptr = shmem_getpage_locked(>u.shmem_i, inode, idx);
if (IS_ERR (*ptr))
goto failed;
+
UnlockPage(*ptr);
-   up (>i_sem);
+   up (>sem);
return 0;
 failed:
-   up (>i_sem);
+   up (>sem);
error = PTR_ERR(*ptr);
-   *ptr = NOPAGE_OOM;
-   if (error != -EFBIG)
-   *ptr = NOPAGE_SIGBUS;
-   return error;
-sigbus:
-   up (>i_sem);
*ptr = NOPAGE_SIGBUS;
-   return -EFAULT;
+   if (error == -ENOMEM)
+   *ptr = NOPAGE_OOM;
+   return error;
 }
 
 struct page * shmem_nopage(struct vm_area_struct * vma, unsigned long address, int 
no_share)
@@ -500,6 +501,7 @@
 struct inode *shmem_get_inode(struct super_block *sb, int mode, int dev)
 {
struct inode * inode;
+   struct shmem_inode_info *info;
 
spin_lock (>u.shmem_sb.stat_lock);
if (!sb->u.shmem_sb.free_inodes) {
@@ -519,7 +521,9 @@
inode->i_rdev = to_kdev_t(dev);
inode->i_mapping->a_ops = _aops;
inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
-   spin_lock_init (>u.shmem_i.lock);
+   info = >u.shmem_i;
+   spin_lock_init (>lock);
+   sema_init (>sem, 1);
switch (mode & S_IFMT) {
default:
init_special_inode(inode, mode, dev);
@@ -549,6 +553,7 @@
 shmem_file_write(struct file *file,const char *buf,size_t count,loff_t *ppos)
 {
struct inode  

Re: 2.4 and 2GB swap partition limit

2001-05-01 Thread Christoph Rohland

Hi Alan,

On Mon, 30 Apr 2001, Alan Cox wrote:
>> paging in just released 2.4.4, but in previuos kernel, a page that
>> was paged-out, reserves its place in swap even if it is paged-in
>> again, so once you have paged-out all your ram at least once, you
>> can't get any more memory, even if swap is 'empty'.
> 
> This is a bug in the 2.4 VM, nothing more or less. It and the
> horrible bounce buffer bugs are forcing large machines to remain on
> 2.2. So it has to get fixed

Yes, it is a bug. and thanks for stating this so clearly.

But a lot of the big servers can go to 2.4. because SYSV shm/shm
fs/tmpfs will reclaim the swap entries on swapin. So big databases and
applications servers which rely on shm are not affected.

Greetings
Christoph


-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/



Re: 2.4 and 2GB swap partition limit

2001-05-01 Thread Christoph Rohland

Hi Alan,

On Mon, 30 Apr 2001, Alan Cox wrote:
 paging in just released 2.4.4, but in previuos kernel, a page that
 was paged-out, reserves its place in swap even if it is paged-in
 again, so once you have paged-out all your ram at least once, you
 can't get any more memory, even if swap is 'empty'.
 
 This is a bug in the 2.4 VM, nothing more or less. It and the
 horrible bounce buffer bugs are forcing large machines to remain on
 2.2. So it has to get fixed

Yes, it is a bug. and thanks for stating this so clearly.

But a lot of the big servers can go to 2.4. because SYSV shm/shm
fs/tmpfs will reclaim the swap entries on swapin. So big databases and
applications servers which rely on shm are not affected.

Greetings
Christoph


-
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/



[Patch] deadlock on write in tmpfs

2001-05-01 Thread Christoph Rohland

Hi Linus and Stephen,

tmpfs deadlocks when writing into a file from a mapping of the same
file. 

The problem is the following:

- shmem_file_write may call shmem_no_page and calls
  shmem_getpage_locked later,
- shmem_no_page calls shmem_getpage_locked
- shmem_getpage_locked may call shmem_writepage on page allocation

- shmem_file_write holds the inode semaphore
- shmem_getpage_locked prevent races against shmem_writepage with the
  shmem spinlock
- shmem_getpage_locked needs serialization against itself and
  shmem_truncate

The last was done with the inode semaphore, which deadlocks with
shmem_write

So I see two choices: 

1) Do not serialise the whole of shmem_getpage_locked but protect
   critical pathes with the spinlock and do retries after sleeps
2) Add another semaphore to serialize shmem_getpage_locked and
   shmem_truncate

I tried some time to get 1) done but the retry logic became way too
complicated. So the attached patch implements 2)

I still think it's ugly to add another semaphore, but it works.

Greetings
Christoph

diff -uNr 2.4.4/include/linux/shmem_fs.h c/include/linux/shmem_fs.h
--- 2.4.4/include/linux/shmem_fs.h  Sun Apr 29 20:33:00 2001
+++ c/include/linux/shmem_fs.h  Sun Apr 29 22:43:56 2001
@@ -19,6 +19,7 @@
 
 struct shmem_inode_info {
spinlock_t  lock;
+   struct semaphore sem;
unsigned long   max_index;
swp_entry_t i_direct[SHMEM_NR_DIRECT]; /* for the first blocks */
swp_entry_t   **i_indirect; /* doubly indirect blocks */
diff -uNr 2.4.4/mm/shmem.c c/mm/shmem.c
--- 2.4.4/mm/shmem.cMon Apr 30 09:45:39 2001
+++ c/mm/shmem.cTue May  1 15:15:38 2001
@@ -161,6 +161,7 @@
swp_entry_t **base, **ptr, **last;
struct shmem_inode_info * info = inode-u.shmem_i;
 
+   down(info-sem);
inode-i_ctime = inode-i_mtime = CURRENT_TIME;
spin_lock (info-lock);
index = (inode-i_size + PAGE_CACHE_SIZE - 1)  PAGE_CACHE_SHIFT;
@@ -197,6 +198,7 @@
info-swapped -= freed;
shmem_recalc_inode(inode);
spin_unlock (info-lock);
+   up(info-sem);
 }
 
 static void shmem_delete_inode(struct inode * inode)
@@ -281,15 +283,12 @@
  * still need to guard against racing with shm_writepage(), which might
  * be trying to move the page to the swap cache as we run.
  */
-static struct page * shmem_getpage_locked(struct inode * inode, unsigned long idx)
+static struct page * shmem_getpage_locked(struct shmem_inode_info *info, struct inode 
+* inode, unsigned long idx)
 {
struct address_space * mapping = inode-i_mapping;
-   struct shmem_inode_info *info;
struct page * page;
swp_entry_t *entry;
 
-   info = inode-u.shmem_i;
-
 repeat:
page = find_lock_page(mapping, idx);
if (page)
@@ -393,6 +392,7 @@
 
 static int shmem_getpage(struct inode * inode, unsigned long idx, struct page **ptr)
 {
+   struct shmem_inode_info *info;
struct address_space * mapping = inode-i_mapping;
int error;
 
@@ -407,27 +407,28 @@
page_cache_release(*ptr);
}
 
-   down (inode-i_sem);
-   /* retest we may have slept */
+   info = inode-u.shmem_i;
+   down (info-sem);
+   /* retest we may have slept */  
+
+   *ptr = ERR_PTR(-EFAULT);
if (inode-i_size  (loff_t) idx * PAGE_CACHE_SIZE)
-   goto sigbus;
-   *ptr = shmem_getpage_locked(inode, idx);
+   goto failed;
+
+   *ptr = shmem_getpage_locked(inode-u.shmem_i, inode, idx);
if (IS_ERR (*ptr))
goto failed;
+
UnlockPage(*ptr);
-   up (inode-i_sem);
+   up (info-sem);
return 0;
 failed:
-   up (inode-i_sem);
+   up (info-sem);
error = PTR_ERR(*ptr);
-   *ptr = NOPAGE_OOM;
-   if (error != -EFBIG)
-   *ptr = NOPAGE_SIGBUS;
-   return error;
-sigbus:
-   up (inode-i_sem);
*ptr = NOPAGE_SIGBUS;
-   return -EFAULT;
+   if (error == -ENOMEM)
+   *ptr = NOPAGE_OOM;
+   return error;
 }
 
 struct page * shmem_nopage(struct vm_area_struct * vma, unsigned long address, int 
no_share)
@@ -500,6 +501,7 @@
 struct inode *shmem_get_inode(struct super_block *sb, int mode, int dev)
 {
struct inode * inode;
+   struct shmem_inode_info *info;
 
spin_lock (sb-u.shmem_sb.stat_lock);
if (!sb-u.shmem_sb.free_inodes) {
@@ -519,7 +521,9 @@
inode-i_rdev = to_kdev_t(dev);
inode-i_mapping-a_ops = shmem_aops;
inode-i_atime = inode-i_mtime = inode-i_ctime = CURRENT_TIME;
-   spin_lock_init (inode-u.shmem_i.lock);
+   info = inode-u.shmem_i;
+   spin_lock_init (info-lock);
+   sema_init (info-sem, 1);
switch (mode  S_IFMT) {
default:
init_special_inode(inode, mode, dev);
@@ -549,6 +553,7 @@
 shmem_file_write(struct 

Re: ramdisk/tmpfs/ramfs/memfs ?

2001-04-27 Thread Christoph Rohland

Hi Padraig,

On Fri, 27 Apr 2001, Padraig Brady wrote:
> I don't have swap so don't need tmpfs, but could probably
> use it anyway without a backing store? 

Yes, it does not need backing store.

> Anyway why was ramfs created if tmpfs existed, unless tmpfs requires
> backing store?  They both seem to have been written around the same
> time?

- shm fs was written as a specialized fs to implement POSIX shared
  memory based on SYSV shm.
- ramfs was introduced shortly after shm fs and was meant as a
  programming example for a minimal virtual filesystem. 
- Later shm fs was redone to use the same methods like ramfs but still
  was only useable for shared memory.
- After the release of 2.4.0, I extended shm fs to support read/write
  and thus be tmpfs and since then it can replace ramfs.

Greetings
Christoph


-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/



Re: ramdisk/tmpfs/ramfs/memfs ?

2001-04-27 Thread Christoph Rohland

On Fri, 27 Apr 2001, [EMAIL PROTECTED] wrote:
>> > tmpfs is basically ramfs with limits.
>> > 
>> 
>> ... and swappable.
>> 
>> -hpa
> 
> Hmmm and what's shmfs? Precedessor of tmpfs?

Yes.

> I even cant remember which one I use for /tmp ;-)

You can mount tmpfs also with type "shm" for compatibility. Type "shm"
will be marked as obsolete in 2.5

Greetings
Christoph


-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/



Re: ramdisk/tmpfs/ramfs/memfs ?

2001-04-27 Thread Christoph Rohland

Hi Padraig,

On Thu, 26 Apr 2001, Padraig Brady wrote:
> 2. Is tmpfs is basically swap and /tmp together in a ramdisk?
>The advantage being you need to reserve less RAM for both
>together than seperately?

tmpfs is ramfs+swap+limits. It is not using ramdisks and is not
related to them.

> 3. If I've no backing store (harddisk?) is there any advantage 
>of using tmpfs instead of ramfs? Also does tmpfs need a 
>backing store?

Probably yes, since you spare a little bit kernel memory. most of
tmpfs is unconditionally in the kernel for shared mappings. So the
actual CONFIG_TMPFS only adds some small functions to the kernel to
export this to usre space.

Greetings
Christoph


-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/



Re: ramdisk/tmpfs/ramfs/memfs ?

2001-04-27 Thread Christoph Rohland

Hi Padraig,

On Thu, 26 Apr 2001, Padraig Brady wrote:
 2. Is tmpfs is basically swap and /tmp together in a ramdisk?
The advantage being you need to reserve less RAM for both
together than seperately?

tmpfs is ramfs+swap+limits. It is not using ramdisks and is not
related to them.

 3. If I've no backing store (harddisk?) is there any advantage 
of using tmpfs instead of ramfs? Also does tmpfs need a 
backing store?

Probably yes, since you spare a little bit kernel memory. most of
tmpfs is unconditionally in the kernel for shared mappings. So the
actual CONFIG_TMPFS only adds some small functions to the kernel to
export this to usre space.

Greetings
Christoph


-
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/



Re: ramdisk/tmpfs/ramfs/memfs ?

2001-04-27 Thread Christoph Rohland

On Fri, 27 Apr 2001, [EMAIL PROTECTED] wrote:
  tmpfs is basically ramfs with limits.
  
 
 ... and swappable.
 
 -hpa
 
 Hmmm and what's shmfs? Precedessor of tmpfs?

Yes.

 I even cant remember which one I use for /tmp ;-)

You can mount tmpfs also with type shm for compatibility. Type shm
will be marked as obsolete in 2.5

Greetings
Christoph


-
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/



Re: ramdisk/tmpfs/ramfs/memfs ?

2001-04-27 Thread Christoph Rohland

Hi Padraig,

On Fri, 27 Apr 2001, Padraig Brady wrote:
 I don't have swap so don't need tmpfs, but could probably
 use it anyway without a backing store? 

Yes, it does not need backing store.

 Anyway why was ramfs created if tmpfs existed, unless tmpfs requires
 backing store?  They both seem to have been written around the same
 time?

- shm fs was written as a specialized fs to implement POSIX shared
  memory based on SYSV shm.
- ramfs was introduced shortly after shm fs and was meant as a
  programming example for a minimal virtual filesystem. 
- Later shm fs was redone to use the same methods like ramfs but still
  was only useable for shared memory.
- After the release of 2.4.0, I extended shm fs to support read/write
  and thus be tmpfs and since then it can replace ramfs.

Greetings
Christoph


-
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/



Re: hundreds of mount --bind mountpoints?

2001-04-25 Thread Christoph Rohland

Hi Andreas,

On Tue, 24 Apr 2001, Andreas Dilger wrote:
> On the other hand, sockets and shmem are both relatively large...

shmem is only large because the union is large. I introduced the
direct swap array of size SHMEM_NR_DIRECT simply to take advantage of
the union. We can decrease SHMEM_NR_DIRECT very easily. I am thinking
about 1 or 5 which would mean that we allocate an indirect block for
files bigger than 4k or 20k respectively.

The shmem_inode_info would then be 8 or 12 words.

Greetings
Christoph


-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/



Re: shm_open doesn't work (fix maybe).

2001-04-25 Thread Christoph Rohland

Hi,

On Tue, 24 Apr 2001, Jakub Jelinek wrote:
> On Tue, Apr 24, 2001 at 11:46:20AM -0500, Tom Brusehaver (N-Sysdyne
> Corporation) wrote:
>> 
>> I have been chasing all around trying to find out why
>> shm_open always returns ENOSYS. It is implemented
>> in glibc-2.2.2, and seems the 2.4.3 kernel knows about
>> shmfs.
>> 
>> It seems the file linux/mm/shmem.c has:
>> #define SHMEM_MAGIC 0x01021994
>> 
>> And the glibc-2.2.2/sysdeps/unix/sysv/linux/linux_fsinfo.h has:
>> #define SHMFS_SUPER_MAGIC 0x02011994
>> 
>> Well, which is correct?
> 
> Update your glibc, 2.2.3pre* matches 2.4.x kernel:
> 
> 2001-03-03  Ulrich Drepper  <[EMAIL PROTECTED]>
> 
>   * sysdeps/unix/sysv/linux/linux_fsinfo.h (SHMFS_SUPER_MAGIC):
>   Update for real 2.4 kernels.

Yes, and I apologize to Ulrich that the changed number slipped through
to the official kernel. My fault.

Greetings
Christoph


-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/



Re: shm_open doesn't work (fix maybe).

2001-04-25 Thread Christoph Rohland

Hi,

On Tue, 24 Apr 2001, Jakub Jelinek wrote:
 On Tue, Apr 24, 2001 at 11:46:20AM -0500, Tom Brusehaver (N-Sysdyne
 Corporation) wrote:
 
 I have been chasing all around trying to find out why
 shm_open always returns ENOSYS. It is implemented
 in glibc-2.2.2, and seems the 2.4.3 kernel knows about
 shmfs.
 
 It seems the file linux/mm/shmem.c has:
 #define SHMEM_MAGIC 0x01021994
 
 And the glibc-2.2.2/sysdeps/unix/sysv/linux/linux_fsinfo.h has:
 #define SHMFS_SUPER_MAGIC 0x02011994
 
 Well, which is correct?
 
 Update your glibc, 2.2.3pre* matches 2.4.x kernel:
 
 2001-03-03  Ulrich Drepper  [EMAIL PROTECTED]
 
   * sysdeps/unix/sysv/linux/linux_fsinfo.h (SHMFS_SUPER_MAGIC):
   Update for real 2.4 kernels.

Yes, and I apologize to Ulrich that the changed number slipped through
to the official kernel. My fault.

Greetings
Christoph


-
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/



Re: hundreds of mount --bind mountpoints?

2001-04-25 Thread Christoph Rohland

Hi Andreas,

On Tue, 24 Apr 2001, Andreas Dilger wrote:
 On the other hand, sockets and shmem are both relatively large...

shmem is only large because the union is large. I introduced the
direct swap array of size SHMEM_NR_DIRECT simply to take advantage of
the union. We can decrease SHMEM_NR_DIRECT very easily. I am thinking
about 1 or 5 which would mean that we allocate an indirect block for
files bigger than 4k or 20k respectively.

The shmem_inode_info would then be 8 or 12 words.

Greetings
Christoph


-
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/



Re: hundreds of mount --bind mountpoints?

2001-04-24 Thread Christoph Rohland

Hi Al,

On Tue, 24 Apr 2001, Alexander Viro wrote:
> So yes, IMO having such patches available _is_ a good thing. And in
> 2.5 we definitely want them in the tree. If encapsulation part gets
> there during 2.4 and separate allocation is available for all of
> them it will be easier to do without PITA in process.

OK I will do that for tmpfs soon. And I will do the symlink inlining
with that patch.

Greetings
Christoph


-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/



Re: hundreds of mount --bind mountpoints?

2001-04-24 Thread Christoph Rohland

Hi Al,

On Tue, 24 Apr 2001, Alexander Viro wrote:
>> Half an hour? If it takes more than about 5 minutes for JFFS2 I'd
>> be very surprised.
> 
>  What's stopping you? 
> You _are_ JFFS maintainer, aren't you?

So is this the start to change all filesystems in 2.4? I am not sure
we should do that. 

Greetings
Christoph


-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/



Re: hundreds of mount --bind mountpoints?

2001-04-24 Thread Christoph Rohland

Hi Alexander,

On Mon, 23 Apr 2001, Alexander Viro wrote:
>> I like it. ext2fs does the same, so there should be no VFS
>> hassles involved. Al?
> 
> We should get ext2 and friends to move the sucker _out_ of struct
> inode.  As it is, sizeof(struct inode) is way too large. This is 2.5
> stuff, but it really has to be done. More filesystems adding stuff
> into the union is a Bad Thing(tm). If you want to allocates space -
> allocate if yourself; ->clear_inode() is the right place for freeing
> it.

Yes, I agree that the union is way too large and I did not plan to
extend it but simply use the size it has.

if (strlen(path) < sizeof(inode->u))
inline the symlink;
else
put it into the page cache;

So if somebody really cleans up the private inode structures it will
not trigger that often any more and we perhaps have to rethink the
idea.

But also if we use struct shmem_inode_info which is 92 bytes right now
we would inline all symlinks on my machine.

If we reduced its size to 32 (which could be easily done) we would
still inline 6642 out of 9317 symlinks on my machine. That's not bad.

Greetings
Christoph


-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/



Re: hundreds of mount --bind mountpoints?

2001-04-24 Thread Christoph Rohland

Hi Alexander,

On Mon, 23 Apr 2001, Alexander Viro wrote:
 I like it. ext2fs does the same, so there should be no VFS
 hassles involved. Al?
 
 We should get ext2 and friends to move the sucker _out_ of struct
 inode.  As it is, sizeof(struct inode) is way too large. This is 2.5
 stuff, but it really has to be done. More filesystems adding stuff
 into the union is a Bad Thing(tm). If you want to allocates space -
 allocate if yourself; -clear_inode() is the right place for freeing
 it.

Yes, I agree that the union is way too large and I did not plan to
extend it but simply use the size it has.

if (strlen(path)  sizeof(inode-u))
inline the symlink;
else
put it into the page cache;

So if somebody really cleans up the private inode structures it will
not trigger that often any more and we perhaps have to rethink the
idea.

But also if we use struct shmem_inode_info which is 92 bytes right now
we would inline all symlinks on my machine.

If we reduced its size to 32 (which could be easily done) we would
still inline 6642 out of 9317 symlinks on my machine. That's not bad.

Greetings
Christoph


-
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/



Re: hundreds of mount --bind mountpoints?

2001-04-24 Thread Christoph Rohland

Hi Al,

On Tue, 24 Apr 2001, Alexander Viro wrote:
 Half an hour? If it takes more than about 5 minutes for JFFS2 I'd
 be very surprised.
 
 tone polite What's stopping you? /tone
 You _are_ JFFS maintainer, aren't you?

So is this the start to change all filesystems in 2.4? I am not sure
we should do that. 

Greetings
Christoph


-
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/



Re: hundreds of mount --bind mountpoints?

2001-04-24 Thread Christoph Rohland

Hi Al,

On Tue, 24 Apr 2001, Alexander Viro wrote:
 So yes, IMO having such patches available _is_ a good thing. And in
 2.5 we definitely want them in the tree. If encapsulation part gets
 there during 2.4 and separate allocation is available for all of
 them it will be easier to do without PITA in process.

OK I will do that for tmpfs soon. And I will do the symlink inlining
with that patch.

Greetings
Christoph


-
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/



Re: hundreds of mount --bind mountpoints?

2001-04-23 Thread Christoph Rohland

Hi Ingo,

On Mon, 23 Apr 2001, Ingo Oeser wrote:
> On Mon, Apr 23, 2001 at 01:43:27PM +0200, Christoph Rohland wrote:
>> On Sun, 22 Apr 2001, David L. Parsley wrote:
>> > attach packages inside it.  Since symlinks in a tmpfs filesystem
>> > cost 4k each (ouch!), I'm considering using mount --bind for
>> > everything.
>> 
>> What about fixing tmpfs instead?
> 
> The question is: How? If you do it like ramfs, you cannot swap
> these symlinks and this is effectively a mlock(symlink) operation
> allowed for normal users. -> BAD!

How about storing it into the inode structure if it fits into the
fs-private union? If it is too big we allocate the page as we do it
now. The union has 192 bytes. This should be sufficient for most
cases.

Greetings
Christoph


-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/



Re: hundreds of mount --bind mountpoints?

2001-04-23 Thread Christoph Rohland

Hi David,

On Sun, 22 Apr 2001, David L. Parsley wrote:
> I'm still working on a packaging system for diskless
> (quasi-embedded) devices.  The root filesystem is all tmpfs, and I
> attach packages inside it.  Since symlinks in a tmpfs filesystem
> cost 4k each (ouch!), I'm considering using mount --bind for
> everything.

What about fixing tmpfs instead?

Greetings
Christoph


-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/



Re: hundreds of mount --bind mountpoints?

2001-04-23 Thread Christoph Rohland

Hi David,

On Sun, 22 Apr 2001, David L. Parsley wrote:
 I'm still working on a packaging system for diskless
 (quasi-embedded) devices.  The root filesystem is all tmpfs, and I
 attach packages inside it.  Since symlinks in a tmpfs filesystem
 cost 4k each (ouch!), I'm considering using mount --bind for
 everything.

What about fixing tmpfs instead?

Greetings
Christoph


-
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/



Re: hundreds of mount --bind mountpoints?

2001-04-23 Thread Christoph Rohland

Hi Ingo,

On Mon, 23 Apr 2001, Ingo Oeser wrote:
 On Mon, Apr 23, 2001 at 01:43:27PM +0200, Christoph Rohland wrote:
 On Sun, 22 Apr 2001, David L. Parsley wrote:
  attach packages inside it.  Since symlinks in a tmpfs filesystem
  cost 4k each (ouch!), I'm considering using mount --bind for
  everything.
 
 What about fixing tmpfs instead?
 
 The question is: How? If you do it like ramfs, you cannot swap
 these symlinks and this is effectively a mlock(symlink) operation
 allowed for normal users. - BAD!

How about storing it into the inode structure if it fits into the
fs-private union? If it is too big we allocate the page as we do it
now. The union has 192 bytes. This should be sufficient for most
cases.

Greetings
Christoph


-
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/



Re: [NEED TESTERS] remove swapin_readahead Re: shmem_getpage_locked() / swapin_readahead() race in 2.4.4-pre3

2001-04-18 Thread Christoph Rohland

Hi Stephen,

On Tue, 17 Apr 2001, Stephen C. Tweedie wrote:
> I don't see the problem.  shmem_getpage_locked appears to back off
> correctly if it encounters a swap-cached page already existing if
> swapin_readahead has installed the page first, at least with the
> code in 2.4.3-ac5.

But the swap count can be increased by anybody without having the page
lock. So the check triggers and is bogus. See my old patch.

> There *does* appear to be a race, but it's swapin_readahead racing
> with shmem_writepage.  That code does not search for an existing
> entry in the swap cache when it decides to move a shmem page to
> swap, so we can install the page twice and end up doing a lookup on
> the wrong physical page if there is swap readahead going on.

I cannot follow you here. How can we have a swap cache entry if there
is no swap entry. . . . Oh stop you mean swapin_readahead does swap in
some totally bogus page into the swap cache after we did
__get_swap_page? I never thought about that!

> To fix that, shmem_writepage needs to do a swap cache lookup and
> lock before installing the new page --- it should probably just copy
> the new page into the old one if it finds one already there.

OK I will look into that.

Greetings
Christoph


-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/



Re: [NEED TESTERS] remove swapin_readahead Re: shmem_getpage_locked() / swapin_readahead() race in 2.4.4-pre3

2001-04-18 Thread Christoph Rohland

Hi Stephen,

On Tue, 17 Apr 2001, Stephen C. Tweedie wrote:
 I don't see the problem.  shmem_getpage_locked appears to back off
 correctly if it encounters a swap-cached page already existing if
 swapin_readahead has installed the page first, at least with the
 code in 2.4.3-ac5.

But the swap count can be increased by anybody without having the page
lock. So the check triggers and is bogus. See my old patch.

 There *does* appear to be a race, but it's swapin_readahead racing
 with shmem_writepage.  That code does not search for an existing
 entry in the swap cache when it decides to move a shmem page to
 swap, so we can install the page twice and end up doing a lookup on
 the wrong physical page if there is swap readahead going on.

I cannot follow you here. How can we have a swap cache entry if there
is no swap entry. . . . Oh stop you mean swapin_readahead does swap in
some totally bogus page into the swap cache after we did
__get_swap_page? I never thought about that!

 To fix that, shmem_writepage needs to do a swap cache lookup and
 lock before installing the new page --- it should probably just copy
 the new page into the old one if it finds one already there.

OK I will look into that.

Greetings
Christoph


-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/



Re: shmem_getpage_locked() / swapin_readahead() race in 2.4.4-pre3

2001-04-15 Thread Christoph Rohland

Hi,

On Sat, 14 Apr 2001, Marcelo Tosatti wrote:
> There is a nasty race between shmem_getpage_locked() and
> swapin_readahead() with the new shmem code (introduced in 2.4.3-ac3
> and merged in the main tree in 2.4.4-pre3):
> 
> shmem_getpage_locked() finds a page in the swapcache and moves it to
> the pagecache as an shmem page, freeing the swapcache and the swap
> map entry for this page. (which causes a BUG() in mm/shmem.c:353
> since the swap map entry is being used)
> 
> In the meanwhile, swapin_readahead() is allocating a page and adding
> it to the swapcache.

Oh, I was just chasing this also. 

> I don't see any clean fix for this one.

I think the actual check for swap_count is not necessary: If
swapin_readahead allocates a new swap_cache page for the entry, that's
not a real bug. On memory pressure this page will be reclaimed.

Actually we have to make shmem much more unfriendly to the swap cache
to make it correct: I think we have to drop the whole drop swap cache
pages on truncate logic since it uses lookup_swap_cache and
delete_from_swap_cache which both lock the page, while holding a
spinlock :-(

The appended patch implements both changes and relies on the page
stealer to shrink the swap cache. 

It also integrates fixes which Marcelo did send earlier.

Greetings
Christoph



--- 2.4.4-pre3/mm/shmem.c   Sat Apr 14 11:12:54 2001
+++ u2.4.3/mm/shmem.c   Sun Apr 15 13:45:58 2001
@@ -123,10 +123,19 @@
entry = *ptr;
*ptr = (swp_entry_t){0};
freed++;
+#if 0
+/*
+* This does not work since it may sleep while holding
+* a spinlock 
+*
+* We rely on the page stealer to free up the
+* allocated swap space later
+*/
if ((page = lookup_swap_cache(entry)) != NULL) {
delete_from_swap_cache(page);
page_cache_release(page);   
}
+#endif
swap_free (entry);
}
return freed;
@@ -236,8 +245,10 @@

/* Only move to the swap cache if there are no other users of
 * the page. */
-   if (atomic_read(>count) > 2)
-   goto out;
+   if (atomic_read(>count) > 2){
+   set_page_dirty(page);
+   goto out;
+   }

inode = page->mapping->host;
info = >u.shmem_i;
@@ -348,9 +359,6 @@
if (TryLockPage(page)) 
goto wait_retry;
 
-   if (swap_count(page) > 2)
-   BUG();
-   
swap_free(*entry);
*entry = (swp_entry_t) {0};
delete_from_swap_cache_nolock(page);
@@ -432,6 +440,7 @@
*ptr = NOPAGE_SIGBUS;
return error;
 sigbus:
+   up (>i_sem);
*ptr = NOPAGE_SIGBUS;
return -EFAULT;
 }



Re: shmem_getpage_locked() / swapin_readahead() race in 2.4.4-pre3

2001-04-15 Thread Christoph Rohland

Hi,

On Sat, 14 Apr 2001, Marcelo Tosatti wrote:
 There is a nasty race between shmem_getpage_locked() and
 swapin_readahead() with the new shmem code (introduced in 2.4.3-ac3
 and merged in the main tree in 2.4.4-pre3):
 
 shmem_getpage_locked() finds a page in the swapcache and moves it to
 the pagecache as an shmem page, freeing the swapcache and the swap
 map entry for this page. (which causes a BUG() in mm/shmem.c:353
 since the swap map entry is being used)
 
 In the meanwhile, swapin_readahead() is allocating a page and adding
 it to the swapcache.

Oh, I was just chasing this also. 

 I don't see any clean fix for this one.

I think the actual check for swap_count is not necessary: If
swapin_readahead allocates a new swap_cache page for the entry, that's
not a real bug. On memory pressure this page will be reclaimed.

Actually we have to make shmem much more unfriendly to the swap cache
to make it correct: I think we have to drop the whole drop swap cache
pages on truncate logic since it uses lookup_swap_cache and
delete_from_swap_cache which both lock the page, while holding a
spinlock :-(

The appended patch implements both changes and relies on the page
stealer to shrink the swap cache. 

It also integrates fixes which Marcelo did send earlier.

Greetings
Christoph



--- 2.4.4-pre3/mm/shmem.c   Sat Apr 14 11:12:54 2001
+++ u2.4.3/mm/shmem.c   Sun Apr 15 13:45:58 2001
@@ -123,10 +123,19 @@
entry = *ptr;
*ptr = (swp_entry_t){0};
freed++;
+#if 0
+/*
+* This does not work since it may sleep while holding
+* a spinlock 
+*
+* We rely on the page stealer to free up the
+* allocated swap space later
+*/
if ((page = lookup_swap_cache(entry)) != NULL) {
delete_from_swap_cache(page);
page_cache_release(page);   
}
+#endif
swap_free (entry);
}
return freed;
@@ -236,8 +245,10 @@

/* Only move to the swap cache if there are no other users of
 * the page. */
-   if (atomic_read(page-count)  2)
-   goto out;
+   if (atomic_read(page-count)  2){
+   set_page_dirty(page);
+   goto out;
+   }

inode = page-mapping-host;
info = inode-u.shmem_i;
@@ -348,9 +359,6 @@
if (TryLockPage(page)) 
goto wait_retry;
 
-   if (swap_count(page)  2)
-   BUG();
-   
swap_free(*entry);
*entry = (swp_entry_t) {0};
delete_from_swap_cache_nolock(page);
@@ -432,6 +440,7 @@
*ptr = NOPAGE_SIGBUS;
return error;
 sigbus:
+   up (inode-i_sem);
*ptr = NOPAGE_SIGBUS;
return -EFAULT;
 }



Re: 2.4.3-ac2 -- How do I determine if shm is being used?

2001-04-08 Thread Christoph Rohland

Hi Miles,

On Sat, 07 Apr 2001, Miles Lane wrote:
> I have mounted:
> 
>   none on /var/shm type shm (rw)

Not necessary any more.

>   tmpfs on /dev/shm type tmpfs (rw)

Also not necessary, but recommended for POSIX shm. BTW it will not
work with Linus' kernel. Type "shm" is supported by both versions.

> X Error of failed request: BadValue (integer parameter out of range
> for operation)
>   Major opcode of failed request:  146 (MIT-SHM)
>   Minor opcode of failed request:  3 (X_ShmPutImage)
>   Value in failed request:  0x161
>   Serial number of failed request:  35107
>   Current serial number in output stream:  35111

Ubfortunately this does not tell what it wanted to do.

> I'd like to check to make sure that shm is actually accessible
> to my programs.  Is there any easy way to do this?

ipcs should be your friend. Especially 'ipcs -lm'.

Greetings
Christoph


-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/



Re: 2.4.3-ac2 -- How do I determine if shm is being used?

2001-04-08 Thread Christoph Rohland

Hi Miles,

On Sat, 07 Apr 2001, Miles Lane wrote:
 I have mounted:
 
   none on /var/shm type shm (rw)

Not necessary any more.

   tmpfs on /dev/shm type tmpfs (rw)

Also not necessary, but recommended for POSIX shm. BTW it will not
work with Linus' kernel. Type "shm" is supported by both versions.

 X Error of failed request: BadValue (integer parameter out of range
 for operation)
   Major opcode of failed request:  146 (MIT-SHM)
   Minor opcode of failed request:  3 (X_ShmPutImage)
   Value in failed request:  0x161
   Serial number of failed request:  35107
   Current serial number in output stream:  35111

Ubfortunately this does not tell what it wanted to do.

 I'd like to check to make sure that shm is actually accessible
 to my programs.  Is there any easy way to do this?

ipcs should be your friend. Especially 'ipcs -lm'.

Greetings
Christoph


-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/



Re: tmpfs in 2.4.3 and AC

2001-03-31 Thread Christoph Rohland

On Fri, 30 Mar 2001, [EMAIL PROTECTED] wrote:
> tmpfs (or shmfs or whatever name you like) is still different in
> official series (2.4.3) and in ac series. Its a kick in the ass for
> multiboot, as offcial 2.4.3 does not recognise 'tmpfs' in fstab:
> 
> shmfs  /dev/shmtmpfs   ...

Use type shm. It works in both versions.

> Any reason, or is because it has been forgotten ?

Alan picked up the tmpfs extensions. Linus didn't.

Greetings
Christoph


-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/



Re: tmpfs in 2.4.3 and AC

2001-03-31 Thread Christoph Rohland

On Fri, 30 Mar 2001, [EMAIL PROTECTED] wrote:
 tmpfs (or shmfs or whatever name you like) is still different in
 official series (2.4.3) and in ac series. Its a kick in the ass for
 multiboot, as offcial 2.4.3 does not recognise 'tmpfs' in fstab:
 
 shmfs  /dev/shmtmpfs   ...

Use type shm. It works in both versions.

 Any reason, or is because it has been forgotten ?

Alan picked up the tmpfs extensions. Linus didn't.

Greetings
Christoph


-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/



Re: tmpfs: a way to get your system down

2001-03-27 Thread Christoph Rohland

Hi Alex,

On Sat, 24 Mar 2001, Alex Riesen wrote:
> just hit by tmpfs on 2.4.2-ac20
> 
> mount -t tmpfs mnt
> dd if=/dev/zero mnt/tmpfile
> 
> resulted in hardly slowed system and lockup,
> and not in "No space left on device", as expected.

Use mount option "size". The default is unlimited...

Greetings
Christoph


-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/



Re: tmpfs: a way to get your system down

2001-03-27 Thread Christoph Rohland

Hi Alex,

On Sat, 24 Mar 2001, Alex Riesen wrote:
 just hit by tmpfs on 2.4.2-ac20
 
 mount -t tmpfs mnt
 dd if=/dev/zero mnt/tmpfile
 
 resulted in hardly slowed system and lockup,
 and not in "No space left on device", as expected.

Use mount option "size". The default is unlimited...

Greetings
Christoph


-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/



Re: 2.4 and 2GB swap partition limit

2001-03-05 Thread Christoph Rohland

Hi Matt,

On Sun, 4 Mar 2001, Matt Domsch wrote:
> My concern is that if there continues to be a 2GB swap
> partition/file size limitation, and you can have (as currently
> #defined) 8 swap partitions, you're limited to 16GB swap, which then
> follows a max of 8GB RAM.  We'd like to sell servers with 32GB or
> 64GB RAM to customers who request such for their applications.  Such
> customers generally have no problem purchasing additional disks to
> be used for swap, likely on a hardware RAID controller.

I did think about that too and I also think the 2GB limit is not
appropriate for the big servers. But I do not beleive that you need so
much swap on these machines. If you drive a 32 GB machine so heavily
into swap it is more busy finding the pages to swap than doing
anything really interesting. (At least that's my experience)

BTW often these big servers run databases and application servers
which have most of their memory in shared memory. Shared memory does
free the swap entries on swapin. (I thought about changing that but as
long as we have no garbage collection for idle swap entries I will not
do it)

On any loaded server you have to check the swap space requirements
regularly and adjust to your needs. But to setup more than let's
say 8GB swap is a waste of resource IMHO.

> We've also seen (anecdotal evidence here) cases where a kernel
> panics, which we believe may have to do with having 0 < swap < 2x
> RAM.  We're investigating further.

That would be a kernel bug which should be fixed. The kernel should
handle oom/oos.

>> Actually the deal is: either use enough swap (about 2x RAM) or use
>> none at all. 
> 
> If swap space isn't required in all cases, great!  We'll encourage
> the use of swap files as needed, rather than swap partitions.  But,
> if instead you *require* swap = 2x RAM, then the 2GB swap size
> limitation must go.

No it is not strictly required.

But still the 2GB limit is annoying and together with the
arch-independent maximum number of swap partitions/files it is pretty
dumb. 

So I would propose to first make a small patch to make MAX_SWAPFILES
arch-dependent and bigger. (x86 would allow a muc higher
MAX_SWAPFILES)

For 2.5 we could perhaps think about a new swapfile layout which
allows bigger partitions.

Greetings
Christoph


-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/



Re: 2.4 and 2GB swap partition limit

2001-03-05 Thread Christoph Rohland

Hi Matt,

On Sun, 4 Mar 2001, Matt Domsch wrote:
 My concern is that if there continues to be a 2GB swap
 partition/file size limitation, and you can have (as currently
 #defined) 8 swap partitions, you're limited to 16GB swap, which then
 follows a max of 8GB RAM.  We'd like to sell servers with 32GB or
 64GB RAM to customers who request such for their applications.  Such
 customers generally have no problem purchasing additional disks to
 be used for swap, likely on a hardware RAID controller.

I did think about that too and I also think the 2GB limit is not
appropriate for the big servers. But I do not beleive that you need so
much swap on these machines. If you drive a 32 GB machine so heavily
into swap it is more busy finding the pages to swap than doing
anything really interesting. (At least that's my experience)

BTW often these big servers run databases and application servers
which have most of their memory in shared memory. Shared memory does
free the swap entries on swapin. (I thought about changing that but as
long as we have no garbage collection for idle swap entries I will not
do it)

On any loaded server you have to check the swap space requirements
regularly and adjust to your needs. But to setup more than let's
say 8GB swap is a waste of resource IMHO.

 We've also seen (anecdotal evidence here) cases where a kernel
 panics, which we believe may have to do with having 0  swap  2x
 RAM.  We're investigating further.

That would be a kernel bug which should be fixed. The kernel should
handle oom/oos.

 Actually the deal is: either use enough swap (about 2x RAM) or use
 none at all. 
 
 If swap space isn't required in all cases, great!  We'll encourage
 the use of swap files as needed, rather than swap partitions.  But,
 if instead you *require* swap = 2x RAM, then the 2GB swap size
 limitation must go.

No it is not strictly required.

But still the 2GB limit is annoying and together with the
arch-independent maximum number of swap partitions/files it is pretty
dumb. 

So I would propose to first make a small patch to make MAX_SWAPFILES
arch-dependent and bigger. (x86 would allow a muc higher
MAX_SWAPFILES)

For 2.5 we could perhaps think about a new swapfile layout which
allows bigger partitions.

Greetings
Christoph


-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/



Re: Kernel is unstable

2001-03-02 Thread Christoph Rohland

Hi Linus,

On 1 Mar 2001, Linus Torvalds wrote:
> Note how do_brk() does the merging itself (see the comment "Can we
> just expand an old anonymous mapping?"), and that it's basically
> free when done that way, with no worries about locking etc. The same
> could be done fairly trivially in mmap too, but I never saw any real
> usage patterns that made it look all that worthwhile (*). Handling
> the mmap case the same way do_brk() does it would fix the behaviour
> of this pathological example too..

Oh there is at least one application, which does trigger the merging
quite often: SAP R/3. We have a big memory area which is handled in 1M
blocks which get mmaped/munmapped/mprotected all the time. This now
leads to a really big avl tree which before has been much smaller.

I am not sure that the merging is a gain since it in itself is a
overhead and we work on fixed blocks. I simply wanted to point out
that there are applications out there which trigger it.

Greetings
Christoph

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/



Re: Kernel is unstable

2001-03-02 Thread Christoph Rohland

Hi Linus,

On 1 Mar 2001, Linus Torvalds wrote:
 Note how do_brk() does the merging itself (see the comment "Can we
 just expand an old anonymous mapping?"), and that it's basically
 free when done that way, with no worries about locking etc. The same
 could be done fairly trivially in mmap too, but I never saw any real
 usage patterns that made it look all that worthwhile (*). Handling
 the mmap case the same way do_brk() does it would fix the behaviour
 of this pathological example too..

Oh there is at least one application, which does trigger the merging
quite often: SAP R/3. We have a big memory area which is handled in 1M
blocks which get mmaped/munmapped/mprotected all the time. This now
leads to a really big avl tree which before has been much smaller.

I am not sure that the merging is a gain since it in itself is a
overhead and we work on fixed blocks. I simply wanted to point out
that there are applications out there which trigger it.

Greetings
Christoph

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/



[Patch] make file times work in tmpfs

2001-02-14 Thread Christoph Rohland

Hi Alan,

here is a patch that makes the different file timestamps work on
tmpfs.

Greetings
Christoph

--- mac10/mm/shmem.c.orig   Wed Feb 14 14:39:46 2001
+++ mac10/mm/shmem.cWed Feb 14 15:30:09 2001
@@ -160,6 +160,7 @@
swp_entry_t **base, **ptr, **last;
struct shmem_inode_info * info = >u.shmem_i;
 
+   inode->i_ctime = inode->i_mtime = CURRENT_TIME;
spin_lock (>lock);
index = (inode->i_size + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
if (index > info->max_index)
@@ -734,6 +735,7 @@
struct inode * inode = shmem_get_inode(dir->i_sb, mode, dev);
int error = -ENOSPC;
 
+   dir->i_ctime = dir->i_mtime = CURRENT_TIME;
if (inode) {
d_instantiate(dentry, inode);
dget(dentry); /* Extra count - pin the dentry in core */
@@ -767,6 +769,7 @@
if (S_ISDIR(inode->i_mode))
return -EPERM;
 
+   inode->i_ctime = dir->i_ctime = dir->i_mtime = CURRENT_TIME;
inode->i_nlink++;
atomic_inc(>i_count);/* New dentry reference */
dget(dentry);   /* Extra pinning count for the created dentry */
@@ -809,7 +812,9 @@
 
 static int shmem_unlink(struct inode * dir, struct dentry *dentry)
 {
-   dentry->d_inode->i_nlink--;
+   struct inode *inode = dentry->d_inode;
+   inode->i_ctime = dir->i_ctime = dir->i_mtime = CURRENT_TIME;
+   inode->i_nlink--;
dput(dentry);   /* Undo the count from "create" - this does all the work */
return 0;
 }
@@ -836,10 +841,12 @@
if (shmem_empty(new_dentry)) {
struct inode *inode = new_dentry->d_inode;
if (inode) {
+   inode->i_ctime = CURRENT_TIME;
inode->i_nlink--;
dput(new_dentry);
}
error = 0;
+   old_dentry->d_inode->i_ctime = old_dir->i_ctime = old_dir->i_mtime = 
+CURRENT_TIME;
}
return error;
 }
@@ -873,6 +880,7 @@
UnlockPage(page);
page_cache_release(page);
up(>i_sem);
+   dir->i_ctime = dir->i_mtime = CURRENT_TIME;
return 0;
 fail:
up(>i_sem);

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/



[Patch] make file times work in tmpfs

2001-02-14 Thread Christoph Rohland

Hi Alan,

here is a patch that makes the different file timestamps work on
tmpfs.

Greetings
Christoph

--- mac10/mm/shmem.c.orig   Wed Feb 14 14:39:46 2001
+++ mac10/mm/shmem.cWed Feb 14 15:30:09 2001
@@ -160,6 +160,7 @@
swp_entry_t **base, **ptr, **last;
struct shmem_inode_info * info = inode-u.shmem_i;
 
+   inode-i_ctime = inode-i_mtime = CURRENT_TIME;
spin_lock (info-lock);
index = (inode-i_size + PAGE_CACHE_SIZE - 1)  PAGE_CACHE_SHIFT;
if (index  info-max_index)
@@ -734,6 +735,7 @@
struct inode * inode = shmem_get_inode(dir-i_sb, mode, dev);
int error = -ENOSPC;
 
+   dir-i_ctime = dir-i_mtime = CURRENT_TIME;
if (inode) {
d_instantiate(dentry, inode);
dget(dentry); /* Extra count - pin the dentry in core */
@@ -767,6 +769,7 @@
if (S_ISDIR(inode-i_mode))
return -EPERM;
 
+   inode-i_ctime = dir-i_ctime = dir-i_mtime = CURRENT_TIME;
inode-i_nlink++;
atomic_inc(inode-i_count);/* New dentry reference */
dget(dentry);   /* Extra pinning count for the created dentry */
@@ -809,7 +812,9 @@
 
 static int shmem_unlink(struct inode * dir, struct dentry *dentry)
 {
-   dentry-d_inode-i_nlink--;
+   struct inode *inode = dentry-d_inode;
+   inode-i_ctime = dir-i_ctime = dir-i_mtime = CURRENT_TIME;
+   inode-i_nlink--;
dput(dentry);   /* Undo the count from "create" - this does all the work */
return 0;
 }
@@ -836,10 +841,12 @@
if (shmem_empty(new_dentry)) {
struct inode *inode = new_dentry-d_inode;
if (inode) {
+   inode-i_ctime = CURRENT_TIME;
inode-i_nlink--;
dput(new_dentry);
}
error = 0;
+   old_dentry-d_inode-i_ctime = old_dir-i_ctime = old_dir-i_mtime = 
+CURRENT_TIME;
}
return error;
 }
@@ -873,6 +880,7 @@
UnlockPage(page);
page_cache_release(page);
up(inode-i_sem);
+   dir-i_ctime = dir-i_mtime = CURRENT_TIME;
return 0;
 fail:
up(inode-i_sem);

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/



[Patch] correct tmpfs link count for directories

2001-02-13 Thread Christoph Rohland

Hi Alan,

The attached patch makes tmpfs behave more like other fs's. Apparently
perl expects this.

Greetings
Christoph

diff -uNr 2.4.1-ac10/mm/shmem.c 2.4.1-ac10-nlink/mm/shmem.c
--- 2.4.1-ac10/mm/shmem.c   Mon Feb 12 15:01:47 2001
+++ 2.4.1-ac10-nlink/mm/shmem.c Tue Feb 13 13:48:49 2001
@@ -465,6 +465,7 @@
inode->i_fop = _file_operations;
break;
case S_IFDIR:
+   inode->i_nlink++;
inode->i_op = _dir_inode_operations;
inode->i_fop = _dir_operations;
break;
@@ -743,7 +744,12 @@
 
 static int shmem_mkdir(struct inode * dir, struct dentry * dentry, int mode)
 {
-   return shmem_mknod(dir, dentry, mode | S_IFDIR, 0);
+   int error;
+
+   if ((error = shmem_mknod(dir, dentry, mode | S_IFDIR, 0)))
+   return error;
+   dir->i_nlink++;
+   return 0;
 }
 
 static int shmem_create(struct inode *dir, struct dentry *dentry, int mode)
@@ -801,25 +807,21 @@
return 1;
 }
 
-/*
- * This works for both directories and regular files.
- * (non-directories will always have empty subdirs)
- */
 static int shmem_unlink(struct inode * dir, struct dentry *dentry)
 {
-   int retval = -ENOTEMPTY;
+   dentry->d_inode->i_nlink--;
+   dput(dentry);   /* Undo the count from "create" - this does all the work */
+   return 0;
+}
 
-   if (shmem_empty(dentry)) {
-   struct inode *inode = dentry->d_inode;
+static int shmem_rmdir(struct inode * dir, struct dentry *dentry)
+{
+   if (!shmem_empty(dentry))
+   return -ENOTEMPTY;
 
-   inode->i_nlink--;
-   dput(dentry);   /* Undo the count from "create" - this does all the 
work */
-   retval = 0;
-   }
-   return retval;
+   dir->i_nlink--;
+   return shmem_unlink(dir, dentry);
 }
-
-#define shmem_rmdir shmem_unlink
 
 /*
  * The VFS layer already does all the dentry stuff for rename,


-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/



Re: Linux 2.2.19pre10

2001-02-13 Thread Christoph Rohland

Hi Alan,

On Tue, 13 Feb 2001, Alan Cox wrote:
>> Yes, I understand that. But I never got any note that my fix is
>> broken and I still do not understand what's the concern.
> 
> Unless Im misreading the code the segment you poke at has
> potentially been freed before it is written too.

Oh yes I was blind, shame on me. Here comes a fixed version.

Greetings
Christoph

--- 2.2.19-pre10/ipc/shm.c.orig Tue Feb 13 14:35:25 2001
+++ 2.2.19-pre10/ipc/shm.c  Tue Feb 13 14:34:49 2001
@@ -337,6 +337,8 @@
if (current->euid == shp->u.shm_perm.uid ||
current->euid == shp->u.shm_perm.cuid || 
capable(CAP_SYS_ADMIN)) {
+   /* Do not find it any more */
+   shp->u.shm_perm.key = IPC_PRIVATE;
shp->u.shm_perm.mode |= SHM_DEST;
if (shp->u.shm_nattch <= 0)
killseg (id);

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/



Re: Linux 2.2.19pre10

2001-02-13 Thread Christoph Rohland

Hi Alan,

On Tue, 13 Feb 2001, Alan Cox wrote:
>> No, I do not think that it's minor. We had to bring down running
>> application servers to be able to start another one, because the
>> new one couldn't create or attach the systemwide os-monitoring
>> segment and thus refused to start. That's very bad behaviour.
> 
> Well I'll take corrected fixes, but Im not going to hold up a release for it

Yes, I understand that. But I never got any note that my fix is broken
and I still do not understand what's the concern. 

We are holding the BKL while doing this. And if shm_close does not get
called with it we should probably acquire it.

Greetings
Christoph


-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/



  1   2   3   4   >