ceph: check availability of mds cluster on mount after wait timeout
authorXiubo Li <xiubli@redhat.com>
Wed, 11 Dec 2019 01:29:40 +0000 (20:29 -0500)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Fri, 28 Feb 2020 15:36:07 +0000 (16:36 +0100)
[ Upstream commit 97820058fb2831a4b203981fa2566ceaaa396103 ]

If all the MDS daemons are down for some reason, then the first mount
attempt will fail with EIO after the mount request times out.  A mount
attempt will also fail with EIO if all of the MDS's are laggy.

This patch changes the code to return -EHOSTUNREACH in these situations
and adds a pr_info error message to help the admin determine the cause.

URL: https://tracker.ceph.com/issues/4386
Signed-off-by: Xiubo Li <xiubli@redhat.com>
Reviewed-by: Jeff Layton <jlayton@kernel.org>
Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
Signed-off-by: Sasha Levin <sashal@kernel.org>
fs/ceph/mds_client.c
fs/ceph/super.c

index b968334f841e81df35e2fe117035e5a6fbbc467a..f36ddfea4997e826307a368c54203adad11e0bb1 100644 (file)
@@ -2261,8 +2261,7 @@ static int __do_request(struct ceph_mds_client *mdsc,
                if (!(mdsc->fsc->mount_options->flags &
                      CEPH_MOUNT_OPT_MOUNTWAIT) &&
                    !ceph_mdsmap_is_cluster_available(mdsc->mdsmap)) {
-                       err = -ENOENT;
-                       pr_info("probably no mds server is up\n");
+                       err = -EHOSTUNREACH;
                        goto finish;
                }
        }
index 088c4488b4492cf705ba13f35a7196e8c80bfb0a..6b10b20bfe32bc88243647e1d2dd414a8b6ae52c 100644 (file)
@@ -1055,6 +1055,11 @@ static struct dentry *ceph_mount(struct file_system_type *fs_type,
        return res;
 
 out_splat:
+       if (!ceph_mdsmap_is_cluster_available(fsc->mdsc->mdsmap)) {
+               pr_info("No mds server is up or the cluster is laggy\n");
+               err = -EHOSTUNREACH;
+       }
+
        ceph_mdsc_close_sessions(fsc->mdsc);
        deactivate_locked_super(sb);
        goto out_final;