2005-07-25 Erez Zadok <ezk@cs.sunysb.edu>
+ * amd/amd.h (FSF_FORCE_UNMOUNT): new flag used to indicate that a
+ particular fserver may need forced/lazy unmounts when it's mntfs's
+ are unmounted.
+
+ * amd/ops_nfs.c (nfs_umount): a simple unmount returned EBUSY, and
+ the user specified forced_unmounts=yes in amd.conf, and this
+ fserver was flagged with FSF_FORCE_UNMOUNT, and the OS supports
+ forced/lazy unmounts, then try forced/lazy unmounts. This should
+ allow a hung mount point to be removed.
+
+ * amd/srvr_nfs.c (find_nfs_srvr): move away IP-address change
+ detection code to its own function.
+ (check_fs_addr_change): new function to detect if the IP address
+ of a downed host has changed, and do various cleanups and fixups
+ to try and recover as best from that situation (e.g., flushing
+ various caches). Also set the FSF_FORCE_UNMOUNT flag for the
+ fserver in question.
+ (flush_srvr_nfs_cache): pass fserver as argument, so we can
+ selectively flush the NFS cache for a single fserver (or all of
+ them, if you pass NULL).
+
* libamu/xutil.c (switch_to_logfile): truncate a regular-file log
file if user passed non-zero "truncate_log" flag.
* SUCH DAMAGE.
*
*
- * $Id: amd.h,v 1.68 2005/07/26 01:48:13 ezk Exp $
+ * $Id: amd.h,v 1.69 2005/07/26 03:31:08 ezk Exp $
*
*/
#define FSF_PINGING 0x0010 /* Already doing pings */
#define FSF_WEBNFS 0x0020 /* Don't try to contact portmapper */
#define FSF_PING_UNINIT 0x0040 /* ping values have not been initilized */
+#define FSF_FORCE_UNMOUNT 0x0080 /* force umount of this fserver */
#define FSRV_ERROR(fs) ((fs) && (((fs)->fs_flags & FSF_ERROR) == FSF_ERROR))
#define FSRV_ISDOWN(fs) ((fs) && (((fs)->fs_flags & (FSF_DOWN|FSF_VALID)) == (FSF_DOWN|FSF_VALID)))
#define FSRV_ISUP(fs) (!(fs) || (((fs)->fs_flags & (FSF_DOWN|FSF_VALID)) == (FSF_VALID)))
extern int amfs_mount(am_node *mp, mntfs *mf, char *opts);
extern void assign_error_mntfs(am_node *mp);
extern am_node *next_nonerror_node(am_node *xp);
-extern void flush_srvr_nfs_cache(void);
+extern void flush_srvr_nfs_cache(fserver *fs);
extern void am_mounted(am_node *);
extern void mf_mounted(mntfs *mf, bool_t call_free_opts);
extern void am_unmounted(am_node *);
* SUCH DAMAGE.
*
*
- * $Id: amq_subr.c,v 1.20 2005/07/26 01:48:13 ezk Exp $
+ * $Id: amq_subr.c,v 1.21 2005/07/26 03:31:08 ezk Exp $
*
*/
/*
if (amd_state == Run) {
plog(XLOG_INFO, "amq says flush cache");
do_mapc_reload = 0;
- flush_nfs_fhandle_cache((fserver *) 0);
- flush_srvr_nfs_cache();
+ flush_nfs_fhandle_cache((fserver *) NULL);
+ flush_srvr_nfs_cache((fserver *) NULL);
}
break;
}
* SUCH DAMAGE.
*
*
- * $Id: ops_nfs.c,v 1.44 2005/07/20 03:32:30 ezk Exp $
+ * $Id: ops_nfs.c,v 1.45 2005/07/26 03:31:08 ezk Exp $
*
*/
fh_cache *fp;
ITER(fp, fh_cache, &fh_head) {
- if (fp->fh_fs == fs || fs == 0) {
+ if (fp->fh_fs == fs || fs == NULL) {
/*
* Only invalidate port info for non-WebNFS servers
*/
nfs_umount(am_node *am, mntfs *mf)
{
int unmount_flags = (mf->mf_flags & MFF_ON_AUTOFS) ? AMU_UMOUNT_AUTOFS : 0;
+ int new_unmount_flags;
int error = UMOUNT_FS(mf->mf_mount, mnttab_file_name, unmount_flags);
+#if defined(HAVE_UMOUNT2) && (defined(MNT2_GEN_OPT_FORCE) || defined(MNT2_GEN_OPT_DETACH))
+ /*
+ * If the attempt to unmount failed with EBUSY, and this fserver was
+ * marked for forced unmounts, then use forced/lazy unmounts.
+ */
+ if (error == EBUSY &&
+ gopt.flags & CFM_FORCED_UNMOUNTS &&
+ mf->mf_server->fs_flags & FSF_FORCE_UNMOUNT) {
+ plog(XLOG_INFO, "EZK: nfs_umount: trying forced/lazy unmounts");
+ mf->mf_server->fs_flags &= ~FSF_FORCE_UNMOUNT; /* XXX: incorrect */
+ new_unmount_flags = unmount_flags | AMU_UMOUNT_FORCE | AMU_UMOUNT_DETACH;
+ error = UMOUNT_FS(mf->mf_mount, mnttab_file_name, new_unmount_flags);
+ }
+#endif /* HAVE_UMOUNT2 && (MNT2_GEN_OPT_FORCE || MNT2_GEN_OPT_DETACH) */
+
/*
* Here is some code to unmount 'restarted' file systems.
* The restarted file systems are marked as 'nfs', not
if (NSTREQ(mf->mf_mount, new_mf->mf_mount, len) &&
new_mf->mf_mount[len] == '/') {
- int new_unmount_flags =
+ new_unmount_flags =
(new_mf->mf_flags & MFF_ON_AUTOFS) ? AMU_UMOUNT_AUTOFS : 0;
UMOUNT_FS(new_mf->mf_mount, mnttab_file_name, new_unmount_flags);
didsome = 1;
* SUCH DAMAGE.
*
*
- * $Id: srvr_nfs.c,v 1.41 2005/07/11 01:55:28 ezk Exp $
+ * $Id: srvr_nfs.c,v 1.42 2005/07/26 03:31:08 ezk Exp $
*
*/
/*
- * Flush any cached data
+ * Flush cached data for an fserver (or for all, if fs==NULL)
*/
void
-flush_srvr_nfs_cache(void)
+flush_srvr_nfs_cache(fserver *fs)
{
- fserver *fs = 0;
+ fserver *fs2 = NULL;
- ITER(fs, fserver, &nfs_srvr_list) {
- nfs_private *np = (nfs_private *) fs->fs_private;
- if (np) {
- np->np_mountd_inval = TRUE;
- np->np_error = -1;
+ ITER(fs2, fserver, &nfs_srvr_list) {
+ if (fs == NULL || fs == fs2) {
+ nfs_private *np = (nfs_private *) fs2->fs_private;
+ if (np) {
+ np->np_mountd_inval = TRUE;
+ np->np_error = -1;
+ }
}
}
}
}
+static void
+check_fs_addr_change(fserver *fs)
+{
+ struct hostent *hp = NULL;
+ struct in_addr ia;
+ char *old_ipaddr, *new_ipaddr;
+ // nfs_private *np = (nfs_private *) fs->fs_private;
+ EZKDBG;
+
+ hp = gethostbyname(fs->fs_host);
+ if (!hp ||
+ hp->h_addrtype != AF_INET ||
+ !STREQ((char *) hp->h_name, fs->fs_host) ||
+ memcmp((voidp) &fs->fs_ip->sin_addr,
+ (voidp) hp->h_addr,
+ sizeof(fs->fs_ip->sin_addr)) == 0)
+ return;
+ /* if got here: downed server changed IP address */
+ old_ipaddr = strdup(inet_ntoa(fs->fs_ip->sin_addr));
+ memmove((voidp) &ia, (voidp) hp->h_addr, sizeof(struct in_addr));
+ new_ipaddr = inet_ntoa(ia); /* ntoa uses static buf */
+ plog(XLOG_WARNING, "EZK: down fileserver %s changed ip: %s -> %s",
+ fs->fs_host, old_ipaddr, new_ipaddr);
+ XFREE(old_ipaddr);
+ /* copy new IP addr */
+ memmove((voidp) &fs->fs_ip->sin_addr,
+ (voidp) hp->h_addr,
+ sizeof(fs->fs_ip->sin_addr));
+ /* XXX: are any of these correct?! */
+ fs->fs_flags &= ~FSF_DOWN;
+ fs->fs_flags |= FSF_VALID | FSF_WANT;
+ map_flush_srvr(fs); /* XXX: a race with flush_srvr_nfs_cache? */
+ flush_srvr_nfs_cache(fs);
+ fs->fs_flags |= FSF_FORCE_UNMOUNT;
+
+#if 0
+ flush_nfs_fhandle_cache(fs); /* done in caller: nfs_keepalive_timeout */
+ // XXX: need to purge nfs_private so that somehow it will get re-initialized
+#endif
+}
+
/*
* Called when no ping-reply received
*/
*/
flush_nfs_fhandle_cache(fs);
np->np_error = -1;
+ check_fs_addr_change(fs); /* check if IP addr of fserver changed */
} else {
/*
* Known to be down
/*
* This may not be the best way to do things, but it really doesn't make
* sense to query a file server which is marked as 'down' for any
- * version/proto combination: so just return that 'downed' server if it
- * matched. We also check here if by any chance, the IP address of the
- * server was changed; this happens when NFS servers are migrated, or a
- * temporary server is made available for one that failed.
+ * version/proto combination.
*/
ITER(fs, fserver, &nfs_srvr_list) {
- if (!FSRV_ISDOWN(fs) || !STREQ(host, fs->fs_host))
- continue;
- if (memcmp((voidp) &fs->fs_ip->sin_addr,
- (voidp) &ip->sin_addr,
- sizeof(ip->sin_addr)) != 0) {
- /* IP address of downed server has changed! */
- char *old_ipaddr = strdup(inet_ntoa(fs->fs_ip->sin_addr));
- char *new_ipaddr = inet_ntoa(ip->sin_addr); /* ntoa uses static buf */
- plog(XLOG_WARNING, "down fileserver %s changed ip: %s -> %s",
- host, old_ipaddr, new_ipaddr);
- XFREE(old_ipaddr);
- /* Now fix the fserver to the new IP */
- dlog("resetting fileserver %s to ip %s (flags: valid, not down)",
- host, new_ipaddr);
- memmove((voidp) &fs->fs_ip->sin_addr,
- (voidp) &ip->sin_addr,
- sizeof(ip->sin_addr));
- fs->fs_flags |= FSF_VALID;
- fs->fs_flags &= ~(FSF_DOWN|FSF_ERROR);
- flush_nfs_fhandle_cache(fs); /* XXX: safer, but really needed? */
- /* fall through to checking available NFS protocols, pinging, etc. */
- } else {
- /* server was down and is still down. Not much we can do. */
+ if (FSRV_ISDOWN(fs) &&
+ STREQ(host, fs->fs_host)) {
plog(XLOG_WARNING, "fileserver %s is already hung - not running NFS proto/version discovery", host);
fs->fs_refc++;
if (ip)