* amd/srvr_nfs.c (find_nfs_srvr): don't blindly copy the hostent

author Erez Zadok <ezk@cs.sunysb.edu>

Fri, 29 Jul 2005 10:47:19 +0000 (10:47 +0000)

committer Erez Zadok <ezk@cs.sunysb.edu>

Fri, 29 Jul 2005 10:47:19 +0000 (10:47 +0000)
author Erez Zadok <ezk@cs.sunysb.edu>
Fri, 29 Jul 2005 10:47:19 +0000 (10:47 +0000)
committer Erez Zadok <ezk@cs.sunysb.edu>
Fri, 29 Jul 2005 10:47:19 +0000 (10:47 +0000)
diff --git a/ChangeLog b/ChangeLog

index 3d5c6812a7a68e1a21c33e0188417ac57065a37f..2e893e75f25d22b7b9315b2ef0a8f7069caab049 100644 (file)
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,24 @@
+2005-07-29  Erez Zadok  <ezk@cs.sunysb.edu>
+
+       * amd/srvr_nfs.c (find_nfs_srvr): don't blindly copy the hostent
+       IP address.  First check if it differs from the existing one of
+       the fserver, and copy only if it changed.  If it did change, flush
+       the fhandle cache to avoid a stale fhandle being reused.  This
+       allows Amd to detect IP address changes even for an fserver that
+       lost one or more NFS pings, but not enough to be declared totally
+       down.  We handle the "totally down" fserver case in
+       check_fs_addr_change().
+
+       * amd/ops_nfs.c (nfs_umount), amd/sched.c (sched_task),
+       amd/nfs_subr.c (nfs_quick_reply): code clarity.
+
+       * conf/mount/mount_linux.c (linux_nfs_error): dlog mappings of
+       errnos to NFS errors.
+
+       * conf/umount/umount_linux.c (umount2_fs): cleanup code.  Trying
+       stat() seems doomed to hang at times, so don't try stat at all,
+       because umount2() appears to be clever enough to never hang.
+
  2005-07-25  Erez Zadok  <ezk@cs.sunysb.edu>
  
         * amd/amd.h (FSF_FORCE_UNMOUNT): new flag used to indicate that a
diff --git a/amd/nfs_subr.c b/amd/nfs_subr.c

index 6f5502bd9abdf17ed30d6673b15f4ccaf3ff213f..fb60ab046c8c59df17ed43cd55c3408d9fc756d5 100644 (file)
--- a/amd/nfs_subr.c
+++ b/amd/nfs_subr.c
@@ -37,7 +37,7 @@
   * SUCH DAMAGE.
   *
   *
- * $Id: nfs_subr.c,v 1.33 2005/05/18 18:12:31 ezk Exp $
+ * $Id: nfs_subr.c,v 1.34 2005/07/29 10:47:19 ezk Exp $
   *
   */
  
@@ -283,8 +283,7 @@ nfs_quick_reply(am_node *mp, int error)
      /*
       * Free up transp.  It's only used for one reply.
       */
-    XFREE(transp);
-    mp->am_transp = NULL;
+    XFREE(mp->am_transp);
      dlog("Quick reply sent for %s", mp->am_mnt->mf_mount);
    }
  }
diff --git a/amd/ops_nfs.c b/amd/ops_nfs.c

index e81e3af2611e26667889e0bc7917d70a064a43a2..c688e21609915e4c4218e470f9d40cdd07ff09b4 100644 (file)
--- a/amd/ops_nfs.c
+++ b/amd/ops_nfs.c
@@ -37,7 +37,7 @@
   * SUCH DAMAGE.
   *
   *
- * $Id: ops_nfs.c,v 1.45 2005/07/26 03:31:08 ezk Exp $
+ * $Id: ops_nfs.c,v 1.46 2005/07/29 10:47:19 ezk Exp $
   *
   */
  
@@ -925,9 +925,10 @@ nfs_mount(am_node *am, mntfs *mf)
  static int
  nfs_umount(am_node *am, mntfs *mf)
  {
-  int unmount_flags = (mf->mf_flags & MFF_ON_AUTOFS) ? AMU_UMOUNT_AUTOFS : 0;
-  int new_unmount_flags;
-  int error = UMOUNT_FS(mf->mf_mount, mnttab_file_name, unmount_flags);
+  int unmount_flags, new_unmount_flags, error;
+
+  unmount_flags = (mf->mf_flags & MFF_ON_AUTOFS) ? AMU_UMOUNT_AUTOFS : 0;
+  error = UMOUNT_FS(mf->mf_mount, mnttab_file_name, unmount_flags);
  
  #if defined(HAVE_UMOUNT2) && (defined(MNT2_GEN_OPT_FORCE) || defined(MNT2_GEN_OPT_DETACH))
    /*
@@ -938,7 +939,12 @@ nfs_umount(am_node *am, mntfs *mf)
        gopt.flags & CFM_FORCED_UNMOUNTS &&
        mf->mf_server->fs_flags & FSF_FORCE_UNMOUNT) {
      plog(XLOG_INFO, "EZK: nfs_umount: trying forced/lazy unmounts");
-    mf->mf_server->fs_flags &= ~FSF_FORCE_UNMOUNT; /* XXX: incorrect */
+    /*
+     * XXX: turning off the FSF_FORCE_UNMOUNT may not be perfectly
+     * incorrect.  Multiple nodes may need to be timed out and restarted for
+     * a single hung fserver.
+     */
+    mf->mf_server->fs_flags &= ~FSF_FORCE_UNMOUNT;
      new_unmount_flags = unmount_flags | AMU_UMOUNT_FORCE | AMU_UMOUNT_DETACH;
      error = UMOUNT_FS(mf->mf_mount, mnttab_file_name, new_unmount_flags);
    }
diff --git a/amd/sched.c b/amd/sched.c

index b0ac928d3bbdc81263434d75ced73cfc8d469277..7e42f716af1381cd40f5335b1a2211dd67afe4e9 100644 (file)
--- a/amd/sched.c
+++ b/amd/sched.c
@@ -37,7 +37,7 @@
   * SUCH DAMAGE.
   *
   *
- * $Id: sched.c,v 1.17 2005/01/03 20:56:45 ezk Exp $
+ * $Id: sched.c,v 1.18 2005/07/29 10:47:19 ezk Exp $
   *
   */
  
@@ -164,7 +164,7 @@ sched_task(cb_fun *cf, opaque_t ca, wchan_t wchan)
    dlog("SLEEP on %p", wchan);
    p->wchan = wchan;
    p->pid = 0;
-  memset((voidp) &p->w, 0, sizeof(p->w));
+  p->w = 0;                    /* was memset (when ->w was union) */
  }
  
  
diff --git a/amd/srvr_nfs.c b/amd/srvr_nfs.c

index 9b5f95ceb0aecdf3f7941f39475dfbb2a8e2f33c..f9d683ad3e4a8cd06279b6e9eafd82272eafd996 100644 (file)
--- a/amd/srvr_nfs.c
+++ b/amd/srvr_nfs.c
@@ -37,7 +37,7 @@
   * SUCH DAMAGE.
   *
   *
- * $Id: srvr_nfs.c,v 1.42 2005/07/26 03:31:08 ezk Exp $
+ * $Id: srvr_nfs.c,v 1.43 2005/07/29 10:47:19 ezk Exp $
   *
   */
  
@@ -444,8 +444,6 @@ check_fs_addr_change(fserver *fs)
    struct hostent *hp = NULL;
    struct in_addr ia;
    char *old_ipaddr, *new_ipaddr;
-  //  nfs_private *np = (nfs_private *) fs->fs_private;
-  EZKDBG;
  
    hp = gethostbyname(fs->fs_host);
    if (!hp ||
@@ -466,7 +464,7 @@ check_fs_addr_change(fserver *fs)
    memmove((voidp) &fs->fs_ip->sin_addr,
           (voidp) hp->h_addr,
           sizeof(fs->fs_ip->sin_addr));
-  /* XXX: are any of these correct?! */
+  /* XXX: do we need to un/set these flags? */
    fs->fs_flags &= ~FSF_DOWN;
    fs->fs_flags |= FSF_VALID | FSF_WANT;
    map_flush_srvr(fs);          /* XXX: a race with flush_srvr_nfs_cache? */
@@ -475,10 +473,11 @@ check_fs_addr_change(fserver *fs)
  
  #if 0
    flush_nfs_fhandle_cache(fs); /* done in caller: nfs_keepalive_timeout */
-  // XXX: need to purge nfs_private so that somehow it will get re-initialized
+  /* XXX: need to purge nfs_private so that somehow it will get re-initialized? */
  #endif
  }
  
+
  /*
   * Called when no ping-reply received
   */
@@ -520,7 +519,7 @@ nfs_keepalive_timeout(voidp v)
         */
        flush_nfs_fhandle_cache(fs);
        np->np_error = -1;
-      check_fs_addr_change(fs);        /* check if IP addr of fserver changed */
+      check_fs_addr_change(fs); /* check if IP addr of fserver changed */
      } else {
        /*
         * Known to be down
@@ -929,8 +928,21 @@ no_dns:
         * between mounts.
         * Mike Mitchell, mcm@unx.sas.com, 09/08/93
         */
-      if (hp && fs->fs_ip)
+      if (hp && fs->fs_ip &&
+         memcmp((voidp) &fs->fs_ip->sin_addr,
+                (voidp) hp->h_addr,
+                sizeof(fs->fs_ip->sin_addr)) != 0) {
+       struct in_addr ia;
+       char *old_ipaddr, *new_ipaddr;
+       old_ipaddr = strdup(inet_ntoa(fs->fs_ip->sin_addr));
+       memmove((voidp) &ia, (voidp) hp->h_addr, sizeof(struct in_addr));
+       new_ipaddr = inet_ntoa(ia);     /* ntoa uses static buf */
+       plog(XLOG_WARNING, "fileserver %s changed ip: %s -> %s",
+            fs->fs_host, old_ipaddr, new_ipaddr);
+       XFREE(old_ipaddr);
+       flush_nfs_fhandle_cache(fs);
         memmove((voidp) &fs->fs_ip->sin_addr, (voidp) hp->h_addr, sizeof(fs->fs_ip->sin_addr));
+      }
  
        /*
         * If the new file systems doesn't use WebNFS, the nfs pings may
diff --git a/conf/mount/mount_linux.c b/conf/mount/mount_linux.c

index 61c0c2316590abf9a64f7d098c983f27ca2eba47..cf8572332499e1d16c68770537c813e20f591582 100644 (file)
--- a/conf/mount/mount_linux.c
+++ b/conf/mount/mount_linux.c
@@ -37,7 +37,7 @@
   * SUCH DAMAGE.
   *
   *
- * $Id: mount_linux.c,v 1.43 2005/07/09 19:41:06 ezk Exp $
+ * $Id: mount_linux.c,v 1.44 2005/07/29 10:47:19 ezk Exp $
   */
  
  /*
@@ -671,10 +671,14 @@ static int nfs_errormap[] = {
  int
  linux_nfs_error(int e)
  {
+  int ret = (nfsstat) NE_IO;
+
    if (e < NFS_LOMAP || e > NFS_HIMAP)
-    return (nfsstat)NE_IO;
-  e = nfs_errormap[e - NFS_LOMAP];
-  return (nfsstat)e;
+    ret = (nfsstat) NE_IO;
+  else
+    ret = nfs_errormap[e - NFS_LOMAP];
+  dlog("linux_nfs_error: map error %d to NFS error %d", e, ret);
+  return (nfsstat) ret;
  }
  
  
diff --git a/conf/umount/umount_linux.c b/conf/umount/umount_linux.c

index 6ab85df185e5c09507db61d667a9678ff11365ff..b02666a34e738e77688993f577a55dc18b39d881 100644 (file)
--- a/conf/umount/umount_linux.c
+++ b/conf/umount/umount_linux.c
@@ -37,7 +37,7 @@
   * SUCH DAMAGE.
   *
   *
- * $Id: umount_linux.c,v 1.8 2005/07/25 23:49:41 ezk Exp $
+ * $Id: umount_linux.c,v 1.9 2005/07/29 10:47:19 ezk Exp $
   *
   */
  
@@ -213,15 +213,31 @@ umount_fs(char *mntdir, const char *mnttabname, u_int unmount_flags)
  
  #if defined(HAVE_UMOUNT2) && (defined(MNT2_GEN_OPT_FORCE) || defined(MNT2_GEN_OPT_DETACH))
  /*
- * Force unmount, no questions asked, without touching mnttab file.
- * The order here is relevant because we may want to try the "safer" detach
- * unmount before trying the more drastic "forced" unmount.
+ * Force unmount, no questions asked, without touching mnttab file.  Try
+ * detach first because it is safer: will remove the hung mnt point without
+ * affecting hung applications.  "Force" is more risky: it will cause the
+ * kernel to return EIO to applications stuck on a stat(2) of Amd.
   */
  int
  umount2_fs(const char *mntdir, u_int unmount_flags)
  {
    int error = 0;
  
+#ifdef MNT2_GEN_OPT_DETACH
+  if (unmount_flags & AMU_UMOUNT_DETACH) {
+    error = umount2(mntdir, MNT2_GEN_OPT_DETACH);
+    if (error < 0 && (errno == EINVAL || errno == ENOENT))
+      error = 0;               /* ignore EINVAL/ENOENT */
+    if (error < 0) {           /* don't try FORCE if detach succeeded */
+      plog(XLOG_WARNING, "%s: unmount/detach: %m", mntdir);
+      /* fall through to try "force" (if flag specified) */
+    } else {
+      dlog("%s: unmount/detach: OK", mntdir);
+      return error;
+    }
+  }
+#endif /* MNT2_GEN_OPT_DETACH */
+
  #ifdef MNT2_GEN_OPT_FORCE
    if (unmount_flags & AMU_UMOUNT_FORCE) {
      plog(XLOG_INFO, "umount2_fs: trying unmount/forced on %s", mntdir);
@@ -230,56 +246,12 @@ umount2_fs(const char *mntdir, u_int unmount_flags)
        error = 0;               /* ignore EINVAL/ENOENT */
      if (error < 0)
        plog(XLOG_WARNING, "%s: unmount/force: %m", mntdir);
-    else {
+    else
        dlog("%s: unmount/force: OK", mntdir);
-      goto out;
-    }
+    /* fall through to return whatever error we got (if any) */
    }
  #endif /* MNT2_GEN_OPT_FORCE */
  
-#ifdef MNT2_GEN_OPT_DETACH
-  /*
-   * XXX: the stat() below may hang this unmount attempt of a toplvl
-   * mount.  In that case, you may have to kill -9 the Amd process.  A
-   * better way to handle this would be to check mtab for an old amd
-   * process, send a kill -0 to it to see if the Amd process is alive, and
-   * only do the forced unmount if the older Amd process died.
-   */
-  if (unmount_flags & AMU_UMOUNT_DETACH) {
-    /*
-     * If I got an EBUSY from the above FORCE, then don't try to stat(), or
-     * it will hang.
-     */
-    if (error < 0 && errno == EBUSY) {
-      error = 0;
-    } else {
-      struct stat dummy;
-      dlog("umount_fs: try stat() before unmount/detach");
-      error = stat(mntdir, &dummy);
-    }
-    if (!error || (errno == ESTALE || errno == EIO)) {
-      if (error < 0)
-       plog(XLOG_INFO, "unmount2_fs: trying unmount/detach of %s (%m)",
-            mntdir);
-      else
-       plog(XLOG_INFO, "unmount2_fs: trying unmount/detach of %s",
-            mntdir);
-      error = umount2(mntdir, MNT2_GEN_OPT_DETACH);
-      if (error < 0 && (errno == EINVAL || errno == ENOENT))
-       error = 0;              /* ignore EINVAL/ENOENT */
-      if (error < 0)           /* don't try FORCE if detach succeeded */
-       plog(XLOG_WARNING, "%s: unmount/detach: %m", mntdir);
-      else {
-       dlog("%s: unmount/detach: OK", mntdir);
-       goto out;               /* superfluous (but symmetric code :-) */
-      }
-    }
-  }
-#endif /* MNT2_GEN_OPT_DETACH */
-
-#ifdef MNT2_GEN_OPT_DETACH
- out:
-#endif /* MNT2_GEN_OPT_DETACH */
    return error;
  }
  #endif /* HAVE_UMOUNT2 && (MNT2_GEN_OPT_FORCE || MNT2_GEN_OPT_DETACH) */
author	Erez Zadok <ezk@cs.sunysb.edu>
	Fri, 29 Jul 2005 10:47:19 +0000 (10:47 +0000)
committer	Erez Zadok <ezk@cs.sunysb.edu>
	Fri, 29 Jul 2005 10:47:19 +0000 (10:47 +0000)
ChangeLog		patch \| blob \| history
amd/nfs_subr.c		patch \| blob \| history
amd/ops_nfs.c		patch \| blob \| history
amd/sched.c		patch \| blob \| history
amd/srvr_nfs.c		patch \| blob \| history
conf/mount/mount_linux.c		patch \| blob \| history
conf/umount/umount_linux.c		patch \| blob \| history