mm: prevent concurrent unmap_mapping_range() on the same inode
authorMiklos Szeredi <mszeredi@suse.cz>
Wed, 23 Feb 2011 12:49:47 +0000 (13:49 +0100)
committerGreg Kroah-Hartman <gregkh@suse.de>
Mon, 7 Mar 2011 23:05:08 +0000 (15:05 -0800)
commit 2aa15890f3c191326678f1bd68af61ec6b8753ec upstream.

Michael Leun reported that running parallel opens on a fuse filesystem
can trigger a "kernel BUG at mm/truncate.c:475"

Gurudas Pai reported the same bug on NFS.

The reason is, unmap_mapping_range() is not prepared for more than
one concurrent invocation per inode.  For example:

  thread1: going through a big range, stops in the middle of a vma and
     stores the restart address in vm_truncate_count.

  thread2: comes in with a small (e.g. single page) unmap request on
     the same vma, somewhere before restart_address, finds that the
     vma was already unmapped up to the restart address and happily
     returns without doing anything.

Another scenario would be two big unmap requests, both having to
restart the unmapping and each one setting vm_truncate_count to its
own value.  This could go on forever without any of them being able to
finish.

Truncate and hole punching already serialize with i_mutex.  Other
callers of unmap_mapping_range() do not, and it's difficult to get
i_mutex protection for all callers.  In particular ->d_revalidate(),
which calls invalidate_inode_pages2_range() in fuse, may be called
with or without i_mutex.

This patch adds a new mutex to 'struct address_space' to prevent
running multiple concurrent unmap_mapping_range() on the same mapping.

[ We'll hopefully get rid of all this with the upcoming mm
  preemptibility series by Peter Zijlstra, the "mm: Remove i_mmap_mutex
  lockbreak" patch in particular.  But that is for 2.6.39 ]

Signed-off-by: Miklos Szeredi <mszeredi@suse.cz>
Reported-by: Michael Leun <lkml20101129@newton.leun.net>
Reported-by: Gurudas Pai <gurudas.pai@oracle.com>
Tested-by: Gurudas Pai <gurudas.pai@oracle.com>
Acked-by: Hugh Dickins <hughd@google.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
fs/gfs2/main.c
fs/inode.c
fs/nilfs2/btnode.c
fs/nilfs2/btnode.h
fs/nilfs2/mdt.c
fs/nilfs2/page.h
fs/nilfs2/super.c
include/linux/fs.h
mm/memory.c

index ebef7ab6e17e4f55396888bce84287d1e5b4a240..f910999d15d5379ec7d97b8a25366baada5c76dc 100644 (file)
@@ -59,14 +59,7 @@ static void gfs2_init_gl_aspace_once(void *foo)
        struct address_space *mapping = (struct address_space *)(gl + 1);
 
        gfs2_init_glock_once(gl);
-       memset(mapping, 0, sizeof(*mapping));
-       INIT_RADIX_TREE(&mapping->page_tree, GFP_ATOMIC);
-       spin_lock_init(&mapping->tree_lock);
-       spin_lock_init(&mapping->i_mmap_lock);
-       INIT_LIST_HEAD(&mapping->private_list);
-       spin_lock_init(&mapping->private_lock);
-       INIT_RAW_PRIO_TREE_ROOT(&mapping->i_mmap);
-       INIT_LIST_HEAD(&mapping->i_mmap_nonlinear);
+       address_space_init_once(mapping);
 }
 
 /**
index ae2727ab0c3ab7695b14f256da0bccdfd3668d81..9fcc6189461a91bea4c6940613561455ebe10d5e 100644 (file)
@@ -280,6 +280,20 @@ static void destroy_inode(struct inode *inode)
                kmem_cache_free(inode_cachep, (inode));
 }
 
+void address_space_init_once(struct address_space *mapping)
+{
+       memset(mapping, 0, sizeof(*mapping));
+       INIT_RADIX_TREE(&mapping->page_tree, GFP_ATOMIC);
+       spin_lock_init(&mapping->tree_lock);
+       spin_lock_init(&mapping->i_mmap_lock);
+       INIT_LIST_HEAD(&mapping->private_list);
+       spin_lock_init(&mapping->private_lock);
+       INIT_RAW_PRIO_TREE_ROOT(&mapping->i_mmap);
+       INIT_LIST_HEAD(&mapping->i_mmap_nonlinear);
+       mutex_init(&mapping->unmap_mutex);
+}
+EXPORT_SYMBOL(address_space_init_once);
+
 /*
  * These are initializations that only need to be done
  * once, because the fields are idempotent across use
@@ -293,13 +307,7 @@ void inode_init_once(struct inode *inode)
        INIT_LIST_HEAD(&inode->i_devices);
        INIT_LIST_HEAD(&inode->i_wb_list);
        INIT_LIST_HEAD(&inode->i_lru);
-       INIT_RADIX_TREE(&inode->i_data.page_tree, GFP_ATOMIC);
-       spin_lock_init(&inode->i_data.tree_lock);
-       spin_lock_init(&inode->i_data.i_mmap_lock);
-       INIT_LIST_HEAD(&inode->i_data.private_list);
-       spin_lock_init(&inode->i_data.private_lock);
-       INIT_RAW_PRIO_TREE_ROOT(&inode->i_data.i_mmap);
-       INIT_LIST_HEAD(&inode->i_data.i_mmap_nonlinear);
+       address_space_init_once(&inode->i_data);
        i_size_ordered_init(inode);
 #ifdef CONFIG_FSNOTIFY
        INIT_HLIST_HEAD(&inode->i_fsnotify_marks);
index 5115814cb74503bd4d34502817e9713a6f90541d..3b008b8bff04c79fd16e8f7c699674836dad789b 100644 (file)
 #include "btnode.h"
 
 
-void nilfs_btnode_cache_init_once(struct address_space *btnc)
-{
-       nilfs_mapping_init_once(btnc);
-}
-
 static const struct address_space_operations def_btnode_aops = {
        .sync_page              = block_sync_page,
 };
index 79037494f1e0408c42c41e3147204e480e765b4e..1b8ebd888c2844348a128a37e0781c1b82915aea 100644 (file)
@@ -37,7 +37,6 @@ struct nilfs_btnode_chkey_ctxt {
        struct buffer_head *newbh;
 };
 
-void nilfs_btnode_cache_init_once(struct address_space *);
 void nilfs_btnode_cache_init(struct address_space *, struct backing_dev_info *);
 void nilfs_btnode_cache_clear(struct address_space *);
 struct buffer_head *nilfs_btnode_create_block(struct address_space *btnc,
index 39a5b84e2c9fbbac846ec50133b496798220373a..bdb8de601562623ddfc88933a23da06a01568349 100644 (file)
@@ -460,9 +460,9 @@ int nilfs_mdt_setup_shadow_map(struct inode *inode,
        struct backing_dev_info *bdi = inode->i_sb->s_bdi;
 
        INIT_LIST_HEAD(&shadow->frozen_buffers);
-       nilfs_mapping_init_once(&shadow->frozen_data);
+       address_space_init_once(&shadow->frozen_data);
        nilfs_mapping_init(&shadow->frozen_data, bdi, &shadow_map_aops);
-       nilfs_mapping_init_once(&shadow->frozen_btnodes);
+       address_space_init_once(&shadow->frozen_btnodes);
        nilfs_mapping_init(&shadow->frozen_btnodes, bdi, &shadow_map_aops);
        mi->mi_shadow = shadow;
        return 0;
index fb9e8a8a20384b5b78a6645d7b4809f0145e24d6..b7e27268e2e4b8d8ec4f5441f5e4a9baa7818487 100644 (file)
@@ -61,7 +61,6 @@ void nilfs_free_private_page(struct page *);
 int nilfs_copy_dirty_pages(struct address_space *, struct address_space *);
 void nilfs_copy_back_pages(struct address_space *, struct address_space *);
 void nilfs_clear_dirty_pages(struct address_space *);
-void nilfs_mapping_init_once(struct address_space *mapping);
 void nilfs_mapping_init(struct address_space *mapping,
                        struct backing_dev_info *bdi,
                        const struct address_space_operations *aops);
index 2940a5853e30ed69ac0e9011901903589319a31a..388e26bcf69f76b31c86794338a006c7e53fa4e9 100644 (file)
@@ -1263,7 +1263,7 @@ static void nilfs_inode_init_once(void *obj)
 #ifdef CONFIG_NILFS_XATTR
        init_rwsem(&ii->xattr_sem);
 #endif
-       nilfs_btnode_cache_init_once(&ii->i_btnode_cache);
+       address_space_init_once(&ii->i_btnode_cache);
        ii->i_bmap = &ii->i_bmap_data;
        inode_init_once(&ii->vfs_inode);
 }
index 090f0eacde296ec52a06fe5eb1b13c07808b7ada..bd94dbeae5db830e7cde169ba9f95804f807e18b 100644 (file)
@@ -646,6 +646,7 @@ struct address_space {
        spinlock_t              private_lock;   /* for use by the address_space */
        struct list_head        private_list;   /* ditto */
        struct address_space    *assoc_mapping; /* ditto */
+       struct mutex            unmap_mutex;    /* to protect unmapping */
 } __attribute__((aligned(sizeof(long))));
        /*
         * On most architectures that alignment is already the case; but
@@ -2203,6 +2204,7 @@ extern loff_t vfs_llseek(struct file *file, loff_t offset, int origin);
 
 extern int inode_init_always(struct super_block *, struct inode *);
 extern void inode_init_once(struct inode *);
+extern void address_space_init_once(struct address_space *mapping);
 extern void ihold(struct inode * inode);
 extern void iput(struct inode *);
 extern struct inode * igrab(struct inode *);
index 02e48aa0ed136ff8e4d808d954a20d0b46e6d23d..e8b2f0380fb2e5658ca9828c7ea8243637dbc719 100644 (file)
@@ -2572,6 +2572,7 @@ void unmap_mapping_range(struct address_space *mapping,
                details.last_index = ULONG_MAX;
        details.i_mmap_lock = &mapping->i_mmap_lock;
 
+       mutex_lock(&mapping->unmap_mutex);
        spin_lock(&mapping->i_mmap_lock);
 
        /* Protect against endless unmapping loops */
@@ -2588,6 +2589,7 @@ void unmap_mapping_range(struct address_space *mapping,
        if (unlikely(!list_empty(&mapping->i_mmap_nonlinear)))
                unmap_mapping_range_list(&mapping->i_mmap_nonlinear, &details);
        spin_unlock(&mapping->i_mmap_lock);
+       mutex_unlock(&mapping->unmap_mutex);
 }
 EXPORT_SYMBOL(unmap_mapping_range);