mm: close PageTail race
authorDavid Rientjes <rientjes@google.com>
Mon, 3 Mar 2014 23:38:18 +0000 (15:38 -0800)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Thu, 3 Apr 2014 19:02:37 +0000 (12:02 -0700)
commit 668f9abbd4334e6c29fa8acd71635c4f9101caa7 upstream.

Commit bf6bddf1924e ("mm: introduce compaction and migration for
ballooned pages") introduces page_count(page) into memory compaction
which dereferences page->first_page if PageTail(page).

This results in a very rare NULL pointer dereference on the
aforementioned page_count(page).  Indeed, anything that does
compound_head(), including page_count() is susceptible to racing with
prep_compound_page() and seeing a NULL or dangling page->first_page
pointer.

This patch uses Andrea's implementation of compound_trans_head() that
deals with such a race and makes it the default compound_head()
implementation.  This includes a read memory barrier that ensures that
if PageTail(head) is true that we return a head page that is neither
NULL nor dangling.  The patch then adds a store memory barrier to
prep_compound_page() to ensure page->first_page is set.

This is the safest way to ensure we see the head page that we are
expecting, PageTail(page) is already in the unlikely() path and the
memory barriers are unfortunately required.

Hugetlbfs is the exception, we don't enforce a store memory barrier
during init since no race is possible.

Signed-off-by: David Rientjes <rientjes@google.com>
Cc: Holger Kiehl <Holger.Kiehl@dwd.de>
Cc: Christoph Lameter <cl@linux.com>
Cc: Rafael Aquini <aquini@redhat.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: Michal Hocko <mhocko@suse.cz>
Cc: Mel Gorman <mgorman@suse.de>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Rik van Riel <riel@redhat.com>
Cc: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
drivers/block/aoe/aoecmd.c
drivers/vfio/vfio_iommu_type1.c
fs/proc/page.c
include/linux/huge_mm.h
include/linux/mm.h
mm/ksm.c
mm/memory-failure.c
mm/page_alloc.c
mm/swap.c

index d2515435e23f2f87215558ec703f5a625ab8ac80..8fb295350efbbda8ecc23fc49707bd2152747c90 100644 (file)
@@ -905,7 +905,7 @@ bio_pageinc(struct bio *bio)
                /* Non-zero page count for non-head members of
                 * compound pages is no longer allowed by the kernel.
                 */
-               page = compound_trans_head(bv->bv_page);
+               page = compound_head(bv->bv_page);
                atomic_inc(&page->_count);
        }
 }
@@ -918,7 +918,7 @@ bio_pagedec(struct bio *bio)
        int i;
 
        bio_for_each_segment(bv, bio, i) {
-               page = compound_trans_head(bv->bv_page);
+               page = compound_head(bv->bv_page);
                atomic_dec(&page->_count);
        }
 }
index 4fb7a8f83c8a99ff8d3412a5328b2407c98409a8..54af4e93369583e5629a4bac6d1ecdbd74f885cf 100644 (file)
@@ -186,12 +186,12 @@ static bool is_invalid_reserved_pfn(unsigned long pfn)
        if (pfn_valid(pfn)) {
                bool reserved;
                struct page *tail = pfn_to_page(pfn);
-               struct page *head = compound_trans_head(tail);
+               struct page *head = compound_head(tail);
                reserved = !!(PageReserved(head));
                if (head != tail) {
                        /*
                         * "head" is not a dangling pointer
-                        * (compound_trans_head takes care of that)
+                        * (compound_head takes care of that)
                         * but the hugepage may have been split
                         * from under us (and we may not hold a
                         * reference count on the head page so it can
index b8730d9ebaee651244d0d7e46e687792b46b0c9f..2a8cc94bb641b65196b75a29c077f09253a7b727 100644 (file)
@@ -121,7 +121,7 @@ u64 stable_page_flags(struct page *page)
         * just checks PG_head/PG_tail, so we need to check PageLRU to make
         * sure a given page is a thp, not a non-huge compound page.
         */
-       else if (PageTransCompound(page) && PageLRU(compound_trans_head(page)))
+       else if (PageTransCompound(page) && PageLRU(compound_head(page)))
                u |= 1 << KPF_THP;
 
        /*
index 91672e2deec36cd4c986116c32b527fe5f934e19..b826239bdce0b26a614ae0802782860348dd6fc6 100644 (file)
@@ -157,23 +157,6 @@ static inline int hpage_nr_pages(struct page *page)
                return HPAGE_PMD_NR;
        return 1;
 }
-static inline struct page *compound_trans_head(struct page *page)
-{
-       if (PageTail(page)) {
-               struct page *head;
-               head = page->first_page;
-               smp_rmb();
-               /*
-                * head may be a dangling pointer.
-                * __split_huge_page_refcount clears PageTail before
-                * overwriting first_page, so if PageTail is still
-                * there it means the head pointer isn't dangling.
-                */
-               if (PageTail(page))
-                       return head;
-       }
-       return page;
-}
 
 extern int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
                                unsigned long addr, pmd_t pmd, pmd_t *pmdp);
@@ -203,7 +186,6 @@ static inline int split_huge_page(struct page *page)
        do { } while (0)
 #define split_huge_page_pmd_mm(__mm, __address, __pmd) \
        do { } while (0)
-#define compound_trans_head(page) compound_head(page)
 static inline int hugepage_madvise(struct vm_area_struct *vma,
                                   unsigned long *vm_flags, int advice)
 {
index 0ab54390a63b878d046617e897c21726b255e693..5360b82451099692409a031f2bee8d42c97d1521 100644 (file)
@@ -389,8 +389,18 @@ static inline void compound_unlock_irqrestore(struct page *page,
 
 static inline struct page *compound_head(struct page *page)
 {
-       if (unlikely(PageTail(page)))
-               return page->first_page;
+       if (unlikely(PageTail(page))) {
+               struct page *head = page->first_page;
+
+               /*
+                * page->first_page may be a dangling pointer to an old
+                * compound page, so recheck that it is still a tail
+                * page before returning.
+                */
+               smp_rmb();
+               if (likely(PageTail(page)))
+                       return head;
+       }
        return page;
 }
 
index 175fff79dc95749f6607aaa70976ebff09193397..418b8cad8b0182ae009ade738313ba8cd025226f 100644 (file)
--- a/mm/ksm.c
+++ b/mm/ksm.c
@@ -444,7 +444,7 @@ static void break_cow(struct rmap_item *rmap_item)
 static struct page *page_trans_compound_anon(struct page *page)
 {
        if (PageTransCompound(page)) {
-               struct page *head = compound_trans_head(page);
+               struct page *head = compound_head(page);
                /*
                 * head may actually be splitted and freed from under
                 * us but it's ok here.
index 90977ac8bed8598a67a17d9ddbf3e2b8b2c5101b..4566e8fa7b71c26128506fc4bd8116afa1f2e3a1 100644 (file)
@@ -1645,7 +1645,7 @@ int soft_offline_page(struct page *page, int flags)
 {
        int ret;
        unsigned long pfn = page_to_pfn(page);
-       struct page *hpage = compound_trans_head(page);
+       struct page *hpage = compound_head(page);
 
        if (PageHWPoison(page)) {
                pr_info("soft offline: %#lx page already poisoned\n", pfn);
index 56f268de3d41d4d7033fd01a998b7a90cd95fd17..589521d432d8bc04dd0809c0fc93c6afc19521cc 100644 (file)
@@ -369,9 +369,11 @@ void prep_compound_page(struct page *page, unsigned long order)
        __SetPageHead(page);
        for (i = 1; i < nr_pages; i++) {
                struct page *p = page + i;
-               __SetPageTail(p);
                set_page_count(p, 0);
                p->first_page = page;
+               /* Make sure p->first_page is always valid for PageTail() */
+               smp_wmb();
+               __SetPageTail(p);
        }
 }
 
index 84b26aaabd03b6d236ba82b84c713844916fd521..7010cf417fdf64daadf1f6459934f2a313f3e1cb 100644 (file)
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -84,7 +84,7 @@ static void put_compound_page(struct page *page)
 {
        if (unlikely(PageTail(page))) {
                /* __split_huge_page_refcount can run under us */
-               struct page *page_head = compound_trans_head(page);
+               struct page *page_head = compound_head(page);
 
                if (likely(page != page_head &&
                           get_page_unless_zero(page_head))) {
@@ -222,7 +222,7 @@ bool __get_page_tail(struct page *page)
         */
        unsigned long flags;
        bool got = false;
-       struct page *page_head = compound_trans_head(page);
+       struct page *page_head = compound_head(page);
 
        if (likely(page != page_head && get_page_unless_zero(page_head))) {
                /* Ref to put_compound_page() comment. */