From: Erez Zadok Date: Tue, 26 Jul 2005 03:31:08 +0000 (+0000) Subject: * amd/amd.h (FSF_FORCE_UNMOUNT): new flag used to indicate that a X-Git-Tag: am-utils-6_1_1~11 X-Git-Url: https://git.fsl.cs.sunysb.edu/?a=commitdiff_plain;h=8d5b03fe3c4a7cea709d5442dcc6c871bccad0aa;p=am-utils-6.1.git * amd/amd.h (FSF_FORCE_UNMOUNT): new flag used to indicate that a particular fserver may need forced/lazy unmounts when it's mntfs's are unmounted. * amd/ops_nfs.c (nfs_umount): a simple unmount returned EBUSY, and the user specified forced_unmounts=yes in amd.conf, and this fserver was flagged with FSF_FORCE_UNMOUNT, and the OS supports forced/lazy unmounts, then try forced/lazy unmounts. This should allow a hung mount point to be removed. * amd/srvr_nfs.c (find_nfs_srvr): move away IP-address change detection code to its own function. (check_fs_addr_change): new function to detect if the IP address of a downed host has changed, and do various cleanups and fixups to try and recover as best from that situation (e.g., flushing various caches). Also set the FSF_FORCE_UNMOUNT flag for the fserver in question. (flush_srvr_nfs_cache): pass fserver as argument, so we can selectively flush the NFS cache for a single fserver (or all of them, if you pass NULL). --- diff --git a/ChangeLog b/ChangeLog index 9b13be9..3d5c681 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,5 +1,26 @@ 2005-07-25 Erez Zadok + * amd/amd.h (FSF_FORCE_UNMOUNT): new flag used to indicate that a + particular fserver may need forced/lazy unmounts when it's mntfs's + are unmounted. + + * amd/ops_nfs.c (nfs_umount): a simple unmount returned EBUSY, and + the user specified forced_unmounts=yes in amd.conf, and this + fserver was flagged with FSF_FORCE_UNMOUNT, and the OS supports + forced/lazy unmounts, then try forced/lazy unmounts. This should + allow a hung mount point to be removed. + + * amd/srvr_nfs.c (find_nfs_srvr): move away IP-address change + detection code to its own function. + (check_fs_addr_change): new function to detect if the IP address + of a downed host has changed, and do various cleanups and fixups + to try and recover as best from that situation (e.g., flushing + various caches). Also set the FSF_FORCE_UNMOUNT flag for the + fserver in question. + (flush_srvr_nfs_cache): pass fserver as argument, so we can + selectively flush the NFS cache for a single fserver (or all of + them, if you pass NULL). + * libamu/xutil.c (switch_to_logfile): truncate a regular-file log file if user passed non-zero "truncate_log" flag. diff --git a/amd/amd.h b/amd/amd.h index 1316d40..19fa916 100644 --- a/amd/amd.h +++ b/amd/amd.h @@ -37,7 +37,7 @@ * SUCH DAMAGE. * * - * $Id: amd.h,v 1.68 2005/07/26 01:48:13 ezk Exp $ + * $Id: amd.h,v 1.69 2005/07/26 03:31:08 ezk Exp $ * */ @@ -140,6 +140,7 @@ #define FSF_PINGING 0x0010 /* Already doing pings */ #define FSF_WEBNFS 0x0020 /* Don't try to contact portmapper */ #define FSF_PING_UNINIT 0x0040 /* ping values have not been initilized */ +#define FSF_FORCE_UNMOUNT 0x0080 /* force umount of this fserver */ #define FSRV_ERROR(fs) ((fs) && (((fs)->fs_flags & FSF_ERROR) == FSF_ERROR)) #define FSRV_ISDOWN(fs) ((fs) && (((fs)->fs_flags & (FSF_DOWN|FSF_VALID)) == (FSF_DOWN|FSF_VALID))) #define FSRV_ISUP(fs) (!(fs) || (((fs)->fs_flags & (FSF_DOWN|FSF_VALID)) == (FSF_VALID))) @@ -534,7 +535,7 @@ extern void amfs_mkcacheref(mntfs *mf); extern int amfs_mount(am_node *mp, mntfs *mf, char *opts); extern void assign_error_mntfs(am_node *mp); extern am_node *next_nonerror_node(am_node *xp); -extern void flush_srvr_nfs_cache(void); +extern void flush_srvr_nfs_cache(fserver *fs); extern void am_mounted(am_node *); extern void mf_mounted(mntfs *mf, bool_t call_free_opts); extern void am_unmounted(am_node *); diff --git a/amd/amq_subr.c b/amd/amq_subr.c index 5855b0b..ef15416 100644 --- a/amd/amq_subr.c +++ b/amd/amq_subr.c @@ -37,7 +37,7 @@ * SUCH DAMAGE. * * - * $Id: amq_subr.c,v 1.20 2005/07/26 01:48:13 ezk Exp $ + * $Id: amq_subr.c,v 1.21 2005/07/26 03:31:08 ezk Exp $ * */ /* @@ -154,8 +154,8 @@ amqproc_setopt_1_svc(voidp argp, struct svc_req *rqstp) if (amd_state == Run) { plog(XLOG_INFO, "amq says flush cache"); do_mapc_reload = 0; - flush_nfs_fhandle_cache((fserver *) 0); - flush_srvr_nfs_cache(); + flush_nfs_fhandle_cache((fserver *) NULL); + flush_srvr_nfs_cache((fserver *) NULL); } break; } diff --git a/amd/ops_nfs.c b/amd/ops_nfs.c index a22972a..e81e3af 100644 --- a/amd/ops_nfs.c +++ b/amd/ops_nfs.c @@ -37,7 +37,7 @@ * SUCH DAMAGE. * * - * $Id: ops_nfs.c,v 1.44 2005/07/20 03:32:30 ezk Exp $ + * $Id: ops_nfs.c,v 1.45 2005/07/26 03:31:08 ezk Exp $ * */ @@ -304,7 +304,7 @@ flush_nfs_fhandle_cache(fserver *fs) fh_cache *fp; ITER(fp, fh_cache, &fh_head) { - if (fp->fh_fs == fs || fs == 0) { + if (fp->fh_fs == fs || fs == NULL) { /* * Only invalidate port info for non-WebNFS servers */ @@ -926,8 +926,24 @@ static int nfs_umount(am_node *am, mntfs *mf) { int unmount_flags = (mf->mf_flags & MFF_ON_AUTOFS) ? AMU_UMOUNT_AUTOFS : 0; + int new_unmount_flags; int error = UMOUNT_FS(mf->mf_mount, mnttab_file_name, unmount_flags); +#if defined(HAVE_UMOUNT2) && (defined(MNT2_GEN_OPT_FORCE) || defined(MNT2_GEN_OPT_DETACH)) + /* + * If the attempt to unmount failed with EBUSY, and this fserver was + * marked for forced unmounts, then use forced/lazy unmounts. + */ + if (error == EBUSY && + gopt.flags & CFM_FORCED_UNMOUNTS && + mf->mf_server->fs_flags & FSF_FORCE_UNMOUNT) { + plog(XLOG_INFO, "EZK: nfs_umount: trying forced/lazy unmounts"); + mf->mf_server->fs_flags &= ~FSF_FORCE_UNMOUNT; /* XXX: incorrect */ + new_unmount_flags = unmount_flags | AMU_UMOUNT_FORCE | AMU_UMOUNT_DETACH; + error = UMOUNT_FS(mf->mf_mount, mnttab_file_name, new_unmount_flags); + } +#endif /* HAVE_UMOUNT2 && (MNT2_GEN_OPT_FORCE || MNT2_GEN_OPT_DETACH) */ + /* * Here is some code to unmount 'restarted' file systems. * The restarted file systems are marked as 'nfs', not @@ -955,7 +971,7 @@ nfs_umount(am_node *am, mntfs *mf) if (NSTREQ(mf->mf_mount, new_mf->mf_mount, len) && new_mf->mf_mount[len] == '/') { - int new_unmount_flags = + new_unmount_flags = (new_mf->mf_flags & MFF_ON_AUTOFS) ? AMU_UMOUNT_AUTOFS : 0; UMOUNT_FS(new_mf->mf_mount, mnttab_file_name, new_unmount_flags); didsome = 1; diff --git a/amd/srvr_nfs.c b/amd/srvr_nfs.c index 2e8dadf..9b5f95c 100644 --- a/amd/srvr_nfs.c +++ b/amd/srvr_nfs.c @@ -37,7 +37,7 @@ * SUCH DAMAGE. * * - * $Id: srvr_nfs.c,v 1.41 2005/07/11 01:55:28 ezk Exp $ + * $Id: srvr_nfs.c,v 1.42 2005/07/26 03:31:08 ezk Exp $ * */ @@ -114,18 +114,20 @@ static void nfs_keepalive(voidp); /* - * Flush any cached data + * Flush cached data for an fserver (or for all, if fs==NULL) */ void -flush_srvr_nfs_cache(void) +flush_srvr_nfs_cache(fserver *fs) { - fserver *fs = 0; + fserver *fs2 = NULL; - ITER(fs, fserver, &nfs_srvr_list) { - nfs_private *np = (nfs_private *) fs->fs_private; - if (np) { - np->np_mountd_inval = TRUE; - np->np_error = -1; + ITER(fs2, fserver, &nfs_srvr_list) { + if (fs == NULL || fs == fs2) { + nfs_private *np = (nfs_private *) fs2->fs_private; + if (np) { + np->np_mountd_inval = TRUE; + np->np_error = -1; + } } } } @@ -436,6 +438,47 @@ nfs_keepalive_callback(voidp pkt, int len, struct sockaddr_in *sp, struct sockad } +static void +check_fs_addr_change(fserver *fs) +{ + struct hostent *hp = NULL; + struct in_addr ia; + char *old_ipaddr, *new_ipaddr; + // nfs_private *np = (nfs_private *) fs->fs_private; + EZKDBG; + + hp = gethostbyname(fs->fs_host); + if (!hp || + hp->h_addrtype != AF_INET || + !STREQ((char *) hp->h_name, fs->fs_host) || + memcmp((voidp) &fs->fs_ip->sin_addr, + (voidp) hp->h_addr, + sizeof(fs->fs_ip->sin_addr)) == 0) + return; + /* if got here: downed server changed IP address */ + old_ipaddr = strdup(inet_ntoa(fs->fs_ip->sin_addr)); + memmove((voidp) &ia, (voidp) hp->h_addr, sizeof(struct in_addr)); + new_ipaddr = inet_ntoa(ia); /* ntoa uses static buf */ + plog(XLOG_WARNING, "EZK: down fileserver %s changed ip: %s -> %s", + fs->fs_host, old_ipaddr, new_ipaddr); + XFREE(old_ipaddr); + /* copy new IP addr */ + memmove((voidp) &fs->fs_ip->sin_addr, + (voidp) hp->h_addr, + sizeof(fs->fs_ip->sin_addr)); + /* XXX: are any of these correct?! */ + fs->fs_flags &= ~FSF_DOWN; + fs->fs_flags |= FSF_VALID | FSF_WANT; + map_flush_srvr(fs); /* XXX: a race with flush_srvr_nfs_cache? */ + flush_srvr_nfs_cache(fs); + fs->fs_flags |= FSF_FORCE_UNMOUNT; + +#if 0 + flush_nfs_fhandle_cache(fs); /* done in caller: nfs_keepalive_timeout */ + // XXX: need to purge nfs_private so that somehow it will get re-initialized +#endif +} + /* * Called when no ping-reply received */ @@ -477,6 +520,7 @@ nfs_keepalive_timeout(voidp v) */ flush_nfs_fhandle_cache(fs); np->np_error = -1; + check_fs_addr_change(fs); /* check if IP addr of fserver changed */ } else { /* * Known to be down @@ -744,35 +788,11 @@ find_nfs_srvr(mntfs *mf) /* * This may not be the best way to do things, but it really doesn't make * sense to query a file server which is marked as 'down' for any - * version/proto combination: so just return that 'downed' server if it - * matched. We also check here if by any chance, the IP address of the - * server was changed; this happens when NFS servers are migrated, or a - * temporary server is made available for one that failed. + * version/proto combination. */ ITER(fs, fserver, &nfs_srvr_list) { - if (!FSRV_ISDOWN(fs) || !STREQ(host, fs->fs_host)) - continue; - if (memcmp((voidp) &fs->fs_ip->sin_addr, - (voidp) &ip->sin_addr, - sizeof(ip->sin_addr)) != 0) { - /* IP address of downed server has changed! */ - char *old_ipaddr = strdup(inet_ntoa(fs->fs_ip->sin_addr)); - char *new_ipaddr = inet_ntoa(ip->sin_addr); /* ntoa uses static buf */ - plog(XLOG_WARNING, "down fileserver %s changed ip: %s -> %s", - host, old_ipaddr, new_ipaddr); - XFREE(old_ipaddr); - /* Now fix the fserver to the new IP */ - dlog("resetting fileserver %s to ip %s (flags: valid, not down)", - host, new_ipaddr); - memmove((voidp) &fs->fs_ip->sin_addr, - (voidp) &ip->sin_addr, - sizeof(ip->sin_addr)); - fs->fs_flags |= FSF_VALID; - fs->fs_flags &= ~(FSF_DOWN|FSF_ERROR); - flush_nfs_fhandle_cache(fs); /* XXX: safer, but really needed? */ - /* fall through to checking available NFS protocols, pinging, etc. */ - } else { - /* server was down and is still down. Not much we can do. */ + if (FSRV_ISDOWN(fs) && + STREQ(host, fs->fs_host)) { plog(XLOG_WARNING, "fileserver %s is already hung - not running NFS proto/version discovery", host); fs->fs_refc++; if (ip)