+2005-07-29 Erez Zadok <ezk@cs.sunysb.edu>
+
+ * amd/srvr_nfs.c (find_nfs_srvr): don't blindly copy the hostent
+ IP address. First check if it differs from the existing one of
+ the fserver, and copy only if it changed. If it did change, flush
+ the fhandle cache to avoid a stale fhandle being reused. This
+ allows Amd to detect IP address changes even for an fserver that
+ lost one or more NFS pings, but not enough to be declared totally
+ down. We handle the "totally down" fserver case in
+ check_fs_addr_change().
+
+ * amd/ops_nfs.c (nfs_umount), amd/sched.c (sched_task),
+ amd/nfs_subr.c (nfs_quick_reply): code clarity.
+
+ * conf/mount/mount_linux.c (linux_nfs_error): dlog mappings of
+ errnos to NFS errors.
+
+ * conf/umount/umount_linux.c (umount2_fs): cleanup code. Trying
+ stat() seems doomed to hang at times, so don't try stat at all,
+ because umount2() appears to be clever enough to never hang.
+
2005-07-25 Erez Zadok <ezk@cs.sunysb.edu>
* amd/amd.h (FSF_FORCE_UNMOUNT): new flag used to indicate that a
* SUCH DAMAGE.
*
*
- * $Id: nfs_subr.c,v 1.33 2005/05/18 18:12:31 ezk Exp $
+ * $Id: nfs_subr.c,v 1.34 2005/07/29 10:47:19 ezk Exp $
*
*/
/*
* Free up transp. It's only used for one reply.
*/
- XFREE(transp);
- mp->am_transp = NULL;
+ XFREE(mp->am_transp);
dlog("Quick reply sent for %s", mp->am_mnt->mf_mount);
}
}
* SUCH DAMAGE.
*
*
- * $Id: ops_nfs.c,v 1.45 2005/07/26 03:31:08 ezk Exp $
+ * $Id: ops_nfs.c,v 1.46 2005/07/29 10:47:19 ezk Exp $
*
*/
static int
nfs_umount(am_node *am, mntfs *mf)
{
- int unmount_flags = (mf->mf_flags & MFF_ON_AUTOFS) ? AMU_UMOUNT_AUTOFS : 0;
- int new_unmount_flags;
- int error = UMOUNT_FS(mf->mf_mount, mnttab_file_name, unmount_flags);
+ int unmount_flags, new_unmount_flags, error;
+
+ unmount_flags = (mf->mf_flags & MFF_ON_AUTOFS) ? AMU_UMOUNT_AUTOFS : 0;
+ error = UMOUNT_FS(mf->mf_mount, mnttab_file_name, unmount_flags);
#if defined(HAVE_UMOUNT2) && (defined(MNT2_GEN_OPT_FORCE) || defined(MNT2_GEN_OPT_DETACH))
/*
gopt.flags & CFM_FORCED_UNMOUNTS &&
mf->mf_server->fs_flags & FSF_FORCE_UNMOUNT) {
plog(XLOG_INFO, "EZK: nfs_umount: trying forced/lazy unmounts");
- mf->mf_server->fs_flags &= ~FSF_FORCE_UNMOUNT; /* XXX: incorrect */
+ /*
+ * XXX: turning off the FSF_FORCE_UNMOUNT may not be perfectly
+ * incorrect. Multiple nodes may need to be timed out and restarted for
+ * a single hung fserver.
+ */
+ mf->mf_server->fs_flags &= ~FSF_FORCE_UNMOUNT;
new_unmount_flags = unmount_flags | AMU_UMOUNT_FORCE | AMU_UMOUNT_DETACH;
error = UMOUNT_FS(mf->mf_mount, mnttab_file_name, new_unmount_flags);
}
* SUCH DAMAGE.
*
*
- * $Id: sched.c,v 1.17 2005/01/03 20:56:45 ezk Exp $
+ * $Id: sched.c,v 1.18 2005/07/29 10:47:19 ezk Exp $
*
*/
dlog("SLEEP on %p", wchan);
p->wchan = wchan;
p->pid = 0;
- memset((voidp) &p->w, 0, sizeof(p->w));
+ p->w = 0; /* was memset (when ->w was union) */
}
* SUCH DAMAGE.
*
*
- * $Id: srvr_nfs.c,v 1.42 2005/07/26 03:31:08 ezk Exp $
+ * $Id: srvr_nfs.c,v 1.43 2005/07/29 10:47:19 ezk Exp $
*
*/
struct hostent *hp = NULL;
struct in_addr ia;
char *old_ipaddr, *new_ipaddr;
- // nfs_private *np = (nfs_private *) fs->fs_private;
- EZKDBG;
hp = gethostbyname(fs->fs_host);
if (!hp ||
memmove((voidp) &fs->fs_ip->sin_addr,
(voidp) hp->h_addr,
sizeof(fs->fs_ip->sin_addr));
- /* XXX: are any of these correct?! */
+ /* XXX: do we need to un/set these flags? */
fs->fs_flags &= ~FSF_DOWN;
fs->fs_flags |= FSF_VALID | FSF_WANT;
map_flush_srvr(fs); /* XXX: a race with flush_srvr_nfs_cache? */
#if 0
flush_nfs_fhandle_cache(fs); /* done in caller: nfs_keepalive_timeout */
- // XXX: need to purge nfs_private so that somehow it will get re-initialized
+ /* XXX: need to purge nfs_private so that somehow it will get re-initialized? */
#endif
}
+
/*
* Called when no ping-reply received
*/
*/
flush_nfs_fhandle_cache(fs);
np->np_error = -1;
- check_fs_addr_change(fs); /* check if IP addr of fserver changed */
+ check_fs_addr_change(fs); /* check if IP addr of fserver changed */
} else {
/*
* Known to be down
* between mounts.
* Mike Mitchell, mcm@unx.sas.com, 09/08/93
*/
- if (hp && fs->fs_ip)
+ if (hp && fs->fs_ip &&
+ memcmp((voidp) &fs->fs_ip->sin_addr,
+ (voidp) hp->h_addr,
+ sizeof(fs->fs_ip->sin_addr)) != 0) {
+ struct in_addr ia;
+ char *old_ipaddr, *new_ipaddr;
+ old_ipaddr = strdup(inet_ntoa(fs->fs_ip->sin_addr));
+ memmove((voidp) &ia, (voidp) hp->h_addr, sizeof(struct in_addr));
+ new_ipaddr = inet_ntoa(ia); /* ntoa uses static buf */
+ plog(XLOG_WARNING, "fileserver %s changed ip: %s -> %s",
+ fs->fs_host, old_ipaddr, new_ipaddr);
+ XFREE(old_ipaddr);
+ flush_nfs_fhandle_cache(fs);
memmove((voidp) &fs->fs_ip->sin_addr, (voidp) hp->h_addr, sizeof(fs->fs_ip->sin_addr));
+ }
/*
* If the new file systems doesn't use WebNFS, the nfs pings may
* SUCH DAMAGE.
*
*
- * $Id: mount_linux.c,v 1.43 2005/07/09 19:41:06 ezk Exp $
+ * $Id: mount_linux.c,v 1.44 2005/07/29 10:47:19 ezk Exp $
*/
/*
int
linux_nfs_error(int e)
{
+ int ret = (nfsstat) NE_IO;
+
if (e < NFS_LOMAP || e > NFS_HIMAP)
- return (nfsstat)NE_IO;
- e = nfs_errormap[e - NFS_LOMAP];
- return (nfsstat)e;
+ ret = (nfsstat) NE_IO;
+ else
+ ret = nfs_errormap[e - NFS_LOMAP];
+ dlog("linux_nfs_error: map error %d to NFS error %d", e, ret);
+ return (nfsstat) ret;
}
* SUCH DAMAGE.
*
*
- * $Id: umount_linux.c,v 1.8 2005/07/25 23:49:41 ezk Exp $
+ * $Id: umount_linux.c,v 1.9 2005/07/29 10:47:19 ezk Exp $
*
*/
#if defined(HAVE_UMOUNT2) && (defined(MNT2_GEN_OPT_FORCE) || defined(MNT2_GEN_OPT_DETACH))
/*
- * Force unmount, no questions asked, without touching mnttab file.
- * The order here is relevant because we may want to try the "safer" detach
- * unmount before trying the more drastic "forced" unmount.
+ * Force unmount, no questions asked, without touching mnttab file. Try
+ * detach first because it is safer: will remove the hung mnt point without
+ * affecting hung applications. "Force" is more risky: it will cause the
+ * kernel to return EIO to applications stuck on a stat(2) of Amd.
*/
int
umount2_fs(const char *mntdir, u_int unmount_flags)
{
int error = 0;
+#ifdef MNT2_GEN_OPT_DETACH
+ if (unmount_flags & AMU_UMOUNT_DETACH) {
+ error = umount2(mntdir, MNT2_GEN_OPT_DETACH);
+ if (error < 0 && (errno == EINVAL || errno == ENOENT))
+ error = 0; /* ignore EINVAL/ENOENT */
+ if (error < 0) { /* don't try FORCE if detach succeeded */
+ plog(XLOG_WARNING, "%s: unmount/detach: %m", mntdir);
+ /* fall through to try "force" (if flag specified) */
+ } else {
+ dlog("%s: unmount/detach: OK", mntdir);
+ return error;
+ }
+ }
+#endif /* MNT2_GEN_OPT_DETACH */
+
#ifdef MNT2_GEN_OPT_FORCE
if (unmount_flags & AMU_UMOUNT_FORCE) {
plog(XLOG_INFO, "umount2_fs: trying unmount/forced on %s", mntdir);
error = 0; /* ignore EINVAL/ENOENT */
if (error < 0)
plog(XLOG_WARNING, "%s: unmount/force: %m", mntdir);
- else {
+ else
dlog("%s: unmount/force: OK", mntdir);
- goto out;
- }
+ /* fall through to return whatever error we got (if any) */
}
#endif /* MNT2_GEN_OPT_FORCE */
-#ifdef MNT2_GEN_OPT_DETACH
- /*
- * XXX: the stat() below may hang this unmount attempt of a toplvl
- * mount. In that case, you may have to kill -9 the Amd process. A
- * better way to handle this would be to check mtab for an old amd
- * process, send a kill -0 to it to see if the Amd process is alive, and
- * only do the forced unmount if the older Amd process died.
- */
- if (unmount_flags & AMU_UMOUNT_DETACH) {
- /*
- * If I got an EBUSY from the above FORCE, then don't try to stat(), or
- * it will hang.
- */
- if (error < 0 && errno == EBUSY) {
- error = 0;
- } else {
- struct stat dummy;
- dlog("umount_fs: try stat() before unmount/detach");
- error = stat(mntdir, &dummy);
- }
- if (!error || (errno == ESTALE || errno == EIO)) {
- if (error < 0)
- plog(XLOG_INFO, "unmount2_fs: trying unmount/detach of %s (%m)",
- mntdir);
- else
- plog(XLOG_INFO, "unmount2_fs: trying unmount/detach of %s",
- mntdir);
- error = umount2(mntdir, MNT2_GEN_OPT_DETACH);
- if (error < 0 && (errno == EINVAL || errno == ENOENT))
- error = 0; /* ignore EINVAL/ENOENT */
- if (error < 0) /* don't try FORCE if detach succeeded */
- plog(XLOG_WARNING, "%s: unmount/detach: %m", mntdir);
- else {
- dlog("%s: unmount/detach: OK", mntdir);
- goto out; /* superfluous (but symmetric code :-) */
- }
- }
- }
-#endif /* MNT2_GEN_OPT_DETACH */
-
-#ifdef MNT2_GEN_OPT_DETACH
- out:
-#endif /* MNT2_GEN_OPT_DETACH */
return error;
}
#endif /* HAVE_UMOUNT2 && (MNT2_GEN_OPT_FORCE || MNT2_GEN_OPT_DETACH) */