Hi Linus,

The following patch (against clean test13-pre4) removes the races in
shmem_unuse. I changed inode.c to not lock the inode if there is no
write_inode function. So I can grab the inode while holding the
spinlock.

It also optimises the shmem_ftruncate behaviour.

BTW: The generic swapoff path itself has still races if a process is
paging in a page which is just freed on swap by try_to_unuse. It gives
'VM: bad swap entries' and worse. But this is not shmem
specific. Marcelo would you like to look into this?

Greetings
                Christoph

--- 4-13-4/fs/inode.c   Sun Dec 17 12:54:00 2000
+++ c/fs/inode.c        Wed Dec 27 11:02:59 2000
@@ -168,13 +168,6 @@
                __wait_on_inode(inode);
 }
 
-
-static inline void write_inode(struct inode *inode, int sync)
-{
-       if (inode->i_sb && inode->i_sb->s_op && inode->i_sb->s_op->write_inode)
-               inode->i_sb->s_op->write_inode(inode, sync);
-}
-
 static inline void __iget(struct inode * inode)
 {
        if (atomic_read(&inode->i_count)) {
@@ -202,16 +195,20 @@
                list_add(&inode->i_list, atomic_read(&inode->i_count)
                                                        ? &inode_in_use
                                                        : &inode_unused);
-               /* Set I_LOCK, reset I_DIRTY */
-               inode->i_state |= I_LOCK;
-               inode->i_state &= ~I_DIRTY;
-               spin_unlock(&inode_lock);
+               if (inode->i_sb && inode->i_sb->s_op && 
+inode->i_sb->s_op->write_inode) {
+                        /* Set I_LOCK, reset I_DIRTY */
+                        inode->i_state |= I_LOCK;
+                        inode->i_state &= ~I_DIRTY;
+                        spin_unlock(&inode_lock);
 
-               write_inode(inode, sync);
+                        inode->i_sb->s_op->write_inode(inode, sync);
 
-               spin_lock(&inode_lock);
-               inode->i_state &= ~I_LOCK;
-               wake_up(&inode->i_wait);
+                        spin_lock(&inode_lock);
+                        inode->i_state &= ~I_LOCK;
+                        wake_up(&inode->i_wait);
+                        return;
+                }
+                inode->i_state &= ~I_DIRTY;
        }
 }
 
diff -uNr 4-13-4/include/linux/shmem_fs.h c/include/linux/shmem_fs.h
--- 4-13-4/include/linux/shmem_fs.h     Fri Dec 22 10:05:38 2000
+++ c/include/linux/shmem_fs.h  Tue Dec 26 18:52:29 2000
@@ -19,6 +19,7 @@
 
 struct shmem_inode_info {
        spinlock_t      lock;
+       unsigned long   max_index;
        swp_entry_t     i_direct[SHMEM_NR_DIRECT]; /* for the first blocks */
        swp_entry_t   **i_indirect; /* doubly indirect blocks */
        unsigned long   swapped;
diff -uNr 4-13-4/mm/shmem.c c/mm/shmem.c
--- 4-13-4/mm/shmem.c   Fri Dec 22 10:05:38 2000
+++ c/mm/shmem.c        Tue Dec 26 20:00:32 2000
@@ -51,11 +51,16 @@
 
 static swp_entry_t * shmem_swp_entry (struct shmem_inode_info *info, unsigned long 
index) 
 {
+       unsigned long offset;
+
        if (index < SHMEM_NR_DIRECT)
                return info->i_direct+index;
 
        index -= SHMEM_NR_DIRECT;
-       if (index >= ENTRIES_PER_PAGE*ENTRIES_PER_PAGE)
+       offset = index % ENTRIES_PER_PAGE;
+       index /= ENTRIES_PER_PAGE;
+
+       if (index >= ENTRIES_PER_PAGE)
                return NULL;
 
        if (!info->i_indirect) {
@@ -63,13 +68,13 @@
                if (!info->i_indirect)
                        return NULL;
        }
-       if(!(info->i_indirect[index/ENTRIES_PER_PAGE])) {
-               info->i_indirect[index/ENTRIES_PER_PAGE] = (swp_entry_t *) 
get_zeroed_page(GFP_USER);
-               if (!info->i_indirect[index/ENTRIES_PER_PAGE])
+       if(!(info->i_indirect[index])) {
+               info->i_indirect[index] = (swp_entry_t *) get_zeroed_page(GFP_USER);
+               if (!info->i_indirect[index])
                        return NULL;
        }
        
-       return info->i_indirect[index/ENTRIES_PER_PAGE]+index%ENTRIES_PER_PAGE;
+       return info->i_indirect[index]+offset;
 }
 
 static int shmem_free_swp(swp_entry_t *dir, unsigned int count)
@@ -99,7 +104,6 @@
  * @dir:       pointer to swp_entries 
  * @size:      number of entries in dir
  * @start:     offset to start from
- * @inode:     inode for statistics
  * @freed:     counter for freed pages
  *
  * It frees the swap entries from dir+start til dir+size
@@ -109,7 +113,7 @@
 
 static unsigned long 
 shmem_truncate_part (swp_entry_t * dir, unsigned long size, 
-                    unsigned long start, struct inode * inode, unsigned long *freed) {
+                    unsigned long start, unsigned long *freed) {
        if (start > size)
                return start - size;
        if (dir)
@@ -121,21 +125,27 @@
 static void shmem_truncate (struct inode * inode)
 {
        int clear_base;
-       unsigned long start;
+       unsigned long index, start;
        unsigned long mmfreed, freed = 0;
-       swp_entry_t **base, **ptr;
+       swp_entry_t **base, **ptr, **last;
        struct shmem_inode_info * info = &inode->u.shmem_i;
 
        spin_lock (&info->lock);
-       start = (inode->i_size + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
+       index = (inode->i_size + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
+       if (index >= info->max_index) {
+               info->max_index = index;
+               spin_unlock (&info->lock);
+               return;
+       }
 
-       start = shmem_truncate_part (info->i_direct, SHMEM_NR_DIRECT, start, inode, 
&freed);
+       start = shmem_truncate_part (info->i_direct, SHMEM_NR_DIRECT, index, &freed);
 
        if (!(base = info->i_indirect))
-               goto out;;
+               goto out;
 
        clear_base = 1;
-       for (ptr = base; ptr < base + ENTRIES_PER_PAGE; ptr++) {
+       last = base + ((info->max_index - SHMEM_NR_DIRECT + ENTRIES_PER_PAGE - 1) / 
+ENTRIES_PER_PAGE);
+       for (ptr = base; ptr < last; ptr++) {
                if (!start) {
                        if (!*ptr)
                                continue;
@@ -145,16 +155,16 @@
                        continue;
                }
                clear_base = 0;
-               start = shmem_truncate_part (*ptr, ENTRIES_PER_PAGE, start, inode, 
&freed);
+               start = shmem_truncate_part (*ptr, ENTRIES_PER_PAGE, start, &freed);
        }
 
-       if (!clear_base) 
-               goto out;
-
-       free_page ((unsigned long)base);
-       info->i_indirect = 0;
+       if (clear_base) {
+               free_page ((unsigned long)base);
+               info->i_indirect = 0;
+       }
 
 out:
+       info->max_index = index;
 
        /*
         * We have to calculate the free blocks since we do not know
@@ -181,14 +191,14 @@
 {
        struct shmem_sb_info *info = &inode->i_sb->u.shmem_sb;
 
-       spin_lock (&shmem_ilock);
-       list_del (&inode->u.shmem_i.list);
-       spin_unlock (&shmem_ilock);
        inode->i_size = 0;
        shmem_truncate (inode);
        spin_lock (&info->stat_lock);
        info->free_inodes++;
        spin_unlock (&info->stat_lock);
+       spin_lock (&shmem_ilock);
+       list_del (&inode->u.shmem_i.list);
+       spin_unlock (&shmem_ilock);
        clear_inode(inode);
 }
 
@@ -215,21 +225,23 @@
        info = &((struct inode *)page->mapping->host)->u.shmem_i;
        if (info->locked)
                return 1;
+       spin_lock(&info->lock);
        swap = __get_swap_page(2);
-       if (!swap.val)
+       if (!swap.val) {
+               spin_unlock(&info->lock);
                return 1;
+       }
 
-       spin_lock(&info->lock);
-       entry = shmem_swp_entry (info, page->index);
+       entry = shmem_swp_entry(info, page->index);
        if (!entry)     /* this had been allocted on page allocation */
                BUG();
        error = -EAGAIN;
        if (entry->val) {
-                __swap_free(swap, 2);
+               __swap_free(swap, 2);
                goto out;
-        }
+       }
 
-        *entry = swap;
+       *entry = swap;
        error = 0;
        /* Remove the from the page cache */
        lru_cache_del(page);
@@ -756,21 +768,21 @@
        return -1;
 }
 
-static int shmem_unuse_inode (struct inode *inode, swp_entry_t entry, struct page 
*page)
+static inline int shmem_unuse_inode (struct shmem_inode_info *info, struct 
+address_space *mapping, swp_entry_t entry, struct page *page)
 {
        swp_entry_t **base, **ptr;
        unsigned long idx;
        int offset;
        
        idx = 0;
-       if ((offset = shmem_clear_swp (entry, inode->u.shmem_i.i_direct, 
SHMEM_NR_DIRECT)) >= 0)
+       if ((offset = shmem_clear_swp (entry,info->i_direct, SHMEM_NR_DIRECT)) >= 0)
                goto found;
 
        idx = SHMEM_NR_DIRECT;
-       if (!(base = inode->u.shmem_i.i_indirect))
+       if (!(base = info->i_indirect))
                return 0;
 
-       for (ptr = base; ptr < base + ENTRIES_PER_PAGE; ptr++) {
+       for (ptr = base; idx <= info->max_index; ptr++) {
                if (*ptr &&
                    (offset = shmem_clear_swp (entry, *ptr, ENTRIES_PER_PAGE)) >= 0)
                        goto found;
@@ -778,35 +790,71 @@
        }
        return 0;
 found:
-       delete_from_swap_cache (page);
-       add_to_page_cache (page, inode->i_mapping, idx);
+       if (PageSwapCache(page))
+               BUG();
+       add_to_page_cache (page, mapping, offset + idx);
        SetPageDirty (page);
        SetPageUptodate (page);
        UnlockPage (page);
-       spin_lock (&inode->u.shmem_i.lock);
-       inode->u.shmem_i.swapped--;
-       spin_unlock (&inode->u.shmem_i.lock);
+       info->swapped--;
        return 1;
 }
 
+static struct inode * grab_next_inode(struct list_head **listp)
+{
+       struct inode *inode;
+
+       for (*listp = (*listp)->next; *listp != &shmem_inodes; *listp = 
+(*listp)->next) {
+               inode = list_entry(*listp, struct inode, u.shmem_i.list);
+
+               if (igrab(inode))
+                       return inode;
+       }
+
+       return NULL;
+}
+
 /*
- * unuse_shmem() search for an eventually swapped out shmem page.
+ * shmem_unuse() search for an eventually swapped out shmem page.
  */
 void shmem_unuse(swp_entry_t entry, struct page *page)
 {
-       struct list_head *p;
-       struct inode * inode;
+       struct list_head *p = &shmem_inodes;
+       struct inode * inode, *next_inode = NULL;
+       struct shmem_inode_info *info;
+       int ret;
 
-       spin_lock (&shmem_ilock);
-       list_for_each(p, &shmem_inodes) {
-               inode = list_entry(p, struct inode, u.shmem_i.list);
+       /*
+        * This is tricky:
+        *
+        * - We cannot use a semaphore for the list since delete will
+        *   revert the lock order.
+        * - We need to get the inode semaphore since the swap cache
+        *   has no atomic actions for getting an entry
+        * - We cannot drop the inode while holding the lock
+        *
+        * So we grab the current inode and the next one to make sure
+        * that the next pointer inthe list is valid.
+        */
 
-               if (shmem_unuse_inode(inode, entry, page))
+       spin_lock(&shmem_ilock);
+       for(inode = grab_next_inode(&p); inode; inode = next_inode) {
+               next_inode = grab_next_inode(&p);
+               info = &inode->u.shmem_i;
+               spin_unlock(&shmem_ilock);
+               down(&inode->i_sem);
+               spin_lock (&info->lock);
+               ret = shmem_unuse_inode(info, inode->i_mapping, entry, page);
+               spin_unlock (&info->lock);
+               up(&inode->i_sem);
+               iput (inode);
+               spin_lock(&shmem_ilock);
+               if (ret)
                        break;
        }
        spin_unlock (&shmem_ilock);
+       iput(next_inode);
 }
-
 
 /*
  * shmem_file_setup - get an unlinked file living in shmem fs

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
Please read the FAQ at http://www.tux.org/lkml/

Reply via email to