net: memcg: late association of sock to memcg
authorShakeel Butt <shakeelb@google.com>
Tue, 10 Mar 2020 05:16:06 +0000 (22:16 -0700)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Fri, 20 Mar 2020 09:54:09 +0000 (10:54 +0100)
[ Upstream commit d752a4986532cb6305dfd5290a614cde8072769d ]

If a TCP socket is allocated in IRQ context or cloned from unassociated
(i.e. not associated to a memcg) in IRQ context then it will remain
unassociated for its whole life. Almost half of the TCPs created on the
system are created in IRQ context, so, memory used by such sockets will
not be accounted by the memcg.

This issue is more widespread in cgroup v1 where network memory
accounting is opt-in but it can happen in cgroup v2 if the source socket
for the cloning was created in root memcg.

To fix the issue, just do the association of the sockets at the accept()
time in the process context and then force charge the memory buffer
already used and reserved by the socket.

Signed-off-by: Shakeel Butt <shakeelb@google.com>
Reviewed-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
mm/memcontrol.c
net/core/sock.c
net/ipv4/inet_connection_sock.c

index 81400be03dcb757b7415f0234d6937b281039de1..5d6ebd1449f07b1e16e65ddf3cc10b34d56a733b 100644 (file)
@@ -5867,20 +5867,6 @@ void mem_cgroup_sk_alloc(struct sock *sk)
        if (!mem_cgroup_sockets_enabled)
                return;
 
-       /*
-        * Socket cloning can throw us here with sk_memcg already
-        * filled. It won't however, necessarily happen from
-        * process context. So the test for root memcg given
-        * the current task's memcg won't help us in this case.
-        *
-        * Respecting the original socket's memcg is a better
-        * decision in this case.
-        */
-       if (sk->sk_memcg) {
-               css_get(&sk->sk_memcg->css);
-               return;
-       }
-
        /* Do not associate the sock with unrelated interrupted task's memcg. */
        if (in_interrupt())
                return;
index 03ca2f638eb4ab8f01b41edd8657623764fe9af0..d2cb2051d04534095bf934a47fd04175ce6ca6b6 100644 (file)
@@ -1684,7 +1684,10 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority)
                atomic_set(&newsk->sk_zckey, 0);
 
                sock_reset_flag(newsk, SOCK_DONE);
-               mem_cgroup_sk_alloc(newsk);
+
+               /* sk->sk_memcg will be populated at accept() time */
+               newsk->sk_memcg = NULL;
+
                cgroup_sk_alloc(&newsk->sk_cgrp_data);
 
                rcu_read_lock();
index da55ce62fe50b4772d5d4a02604f8ad44fdf30de..c786f81952f1e4c1caede1eb110bafa9b2e23a5b 100644 (file)
@@ -475,6 +475,26 @@ struct sock *inet_csk_accept(struct sock *sk, int flags, int *err, bool kern)
                }
                spin_unlock_bh(&queue->fastopenq.lock);
        }
+
+       if (mem_cgroup_sockets_enabled) {
+               int amt;
+
+               /* atomically get the memory usage, set and charge the
+                * sk->sk_memcg.
+                */
+               lock_sock(newsk);
+
+               /* The sk has not been accepted yet, no need to look at
+                * sk->sk_wmem_queued.
+                */
+               amt = sk_mem_pages(newsk->sk_forward_alloc +
+                                  atomic_read(&sk->sk_rmem_alloc));
+               mem_cgroup_sk_alloc(newsk);
+               if (newsk->sk_memcg && amt)
+                       mem_cgroup_charge_skmem(newsk->sk_memcg, amt);
+
+               release_sock(newsk);
+       }
 out:
        release_sock(sk);
        if (req)