#!/bin/sh
#
# Support:      users@clusterlabs.org
# License:      GNU General Public License (GPL)
#
# Filesystem
#      Description: Manages a Filesystem on a shared storage medium.
#  Original Author: Eric Z. Ayers (eric.ayers@compgen.com)
# Original Release: 25 Oct 2000
#
# usage: ./Filesystem {start|stop|status|monitor|validate-all|meta-data}
#
#	OCF parameters are as below:
#		OCF_RESKEY_device
#		OCF_RESKEY_directory
#		OCF_RESKEY_fstype
#		OCF_RESKEY_options
#		OCF_RESKEY_statusfile_prefix
#		OCF_RESKEY_run_fsck
#		OCF_RESKEY_fast_stop
#		OCF_RESKEY_force_clones
#		OCF_RESKEY_force_unmount
#		OCF_RESKEY_term_signals
#		OCF_RESKEY_kill_signals
#		OCF_RESKEY_signal_delay
#	See also the ocf_heartbeat_Filesystem(7) man page.

#
#OCF_RESKEY_device    : name of block device for the filesystem. e.g. /dev/sda1, /dev/md0
#                       Or a -U or -L option for mount, or an NFS mount specification
#OCF_RESKEY_directory : the mount point for the filesystem
#OCF_RESKEY_fstype    : optional name of the filesystem type. e.g. ext2
#OCF_RESKEY_options   : options to be given to the mount command via -o
#OCF_RESKEY_statusfile_prefix : the prefix used for a status file for monitoring
#OCF_RESKEY_run_fsck  : fsck execution mode: auto(default)/force/no
#OCF_RESKEY_fast_stop : fast stop: yes(default)/no
#OCF_RESKEY_force_clones : allow running the resource as clone. e.g. local xfs mounts
#                         for each brick in a glusterfs setup
#
#
# This assumes you want to manage a filesystem on a shared (SCSI) bus,
# on a replicated device (such as DRBD), or a network filesystem (such
# as NFS or Samba).
#
# Do not put this filesystem in /etc/fstab.  This script manages all of
# that for you.
#
# NOTE: If 2 or more nodes mount the same file system read-write, and
#       that file system is not designed for that specific purpose
#       (such as GFS or OCFS2), and is not a network file system like
#       NFS or Samba, then the filesystem is going to become
#       corrupted.
#
#	As a result, you should use this together with the stonith
#	option and redundant, independent communications paths.
#
#	If you don't do this, don't blame us when you scramble your
#	disk.

#######################################################################
# Initialization:

: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat}
. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs

# Defaults
DFLT_STATUSDIR=".Filesystem_status/"

# Parameter defaults

OCF_RESKEY_device_default=""
OCF_RESKEY_directory_default=""
OCF_RESKEY_fstype_default=""
OCF_RESKEY_options_default=""
OCF_RESKEY_statusfile_prefix_default="${DFLT_STATUSDIR}"
OCF_RESKEY_run_fsck_default="auto"
OCF_RESKEY_fast_stop_default="no"
OCF_RESKEY_force_clones_default="false"
OCF_RESKEY_force_unmount_default="true"
OCF_RESKEY_term_signals_default="TERM"
OCF_RESKEY_kill_signals_default="KILL"
OCF_RESKEY_signal_delay_default="1"

# RHEL specific defaults
if is_redhat_based; then
	get_os_ver
	ocf_version_cmp "$VER" "9.0" 2>/dev/null

	case "$?" in
		# RHEL >= 9
		1|2)
			OCF_RESKEY_force_unmount_default="safe";;
		# RHEL < 9 and fallback if ocf_version_cmp() fails
		*)
			OCF_RESKEY_fast_stop_default="yes";;
	esac
fi


if [ -z "${OCF_RESKEY_fast_stop}" ]; then
	case "$OCF_RESKEY_fstype" in
		gfs2)
			OCF_RESKEY_fast_stop="no";;
		*)
			OCF_RESKEY_fast_stop=${OCF_RESKEY_fast_stop_default};;
	esac
fi
: "fast_stop        ::" ${OCF_RESKEY_fast_stop}
: "device           ::" ${OCF_RESKEY_device=${OCF_RESKEY_device_default}}
: "directory        ::" ${OCF_RESKEY_directory=${OCF_RESKEY_directory_default}}
: "fstype           ::" ${OCF_RESKEY_fstype=${OCF_RESKEY_fstype_default}}
: "options          ::" ${OCF_RESKEY_options=${OCF_RESKEY_options_default}}
: "statusfile_prefix::" ${OCF_RESKEY_statusfile_prefix=${OCF_RESKEY_statusfile_prefix_default}}
: "run_fsck         ::" ${OCF_RESKEY_run_fsck=${OCF_RESKEY_run_fsck_default}}
: "force_clones     ::" ${OCF_RESKEY_force_clones=${OCF_RESKEY_force_clones_default}}
: "force_umount     ::" ${OCF_RESKEY_force_unmount=${OCF_RESKEY_force_unmount_default}}
: "term_signals     ::" ${OCF_RESKEY_term_signals=${OCF_RESKEY_term_signals_default}}
: "kill_signals     ::" ${OCF_RESKEY_kill_signals=${OCF_RESKEY_kill_signals_default}}
: "signal_delay     ::" ${OCF_RESKEY_signal_delay=${OCF_RESKEY_signal_delay_default}}

# Variables used by multiple methods
HOSTOS=$(uname)
TAB='	'

# The status file is going to an extra directory, by default
#
prefix=${OCF_RESKEY_statusfile_prefix}
: ${prefix:=$DFLT_STATUSDIR}
suffix="${OCF_RESOURCE_INSTANCE}"
[ "$OCF_RESKEY_CRM_meta_clone" ] &&
	suffix="${suffix}_$OCF_RESKEY_CRM_meta_clone"
suffix="${suffix}_$(uname -n)"
STATUSFILE="${OCF_RESKEY_directory}/$prefix$suffix"

#######################################################################

usage() {
	cat <<-EOT
	usage: $0 {start|stop|status|monitor|validate-all|meta-data}
	EOT
}

meta_data() {
	cat <<END
<?xml version="1.0"?>
<!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd">
<resource-agent name="Filesystem" version="1.1">
<version>1.0</version>

<longdesc lang="en">
Resource script for Filesystem. It manages a Filesystem on a
shared storage medium.

The standard monitor operation of depth 0 (also known as probe)
checks if the filesystem is mounted. If you want deeper tests,
set OCF_CHECK_LEVEL to one of the following values:

10: read first 16 blocks of the device (raw read)

This doesn't exercise the filesystem at all, but the device on
which the filesystem lives. This is noop for non-block devices
such as NFS, SMBFS, or bind mounts.

20: test if a status file can be written and read

The status file must be writable by root. This is not always the
case with an NFS mount, as NFS exports usually have the
"root_squash" option set. In such a setup, you must either use
read-only monitoring (depth=10), export with "no_root_squash" on
your NFS server, or grant world write permissions on the
directory where the status file is to be placed.
</longdesc>
<shortdesc lang="en">Manages filesystem mounts</shortdesc>

<parameters>
<parameter name="device" required="1">
<longdesc lang="en">
The name of block device for the filesystem, or -U, -L options for mount, or NFS mount specification.

NOTE: On Linux /dev/disk/by-{uuid,label}/ are preferred to -U/-L.
</longdesc>
<shortdesc lang="en">block device</shortdesc>
<content type="string" default="${OCF_RESKEY_device_default}" />
</parameter>

<parameter name="directory" required="1">
<longdesc lang="en">
The mount point for the filesystem.
</longdesc>
<shortdesc lang="en">mount point</shortdesc>
<content type="string" default="${OCF_RESKEY_directory_default}" />
</parameter>

<parameter name="fstype" required="1">
<longdesc lang="en">
The type of filesystem to be mounted.
</longdesc>
<shortdesc lang="en">filesystem type</shortdesc>
<content type="string" default="${OCF_RESKEY_fstype_default}" />
</parameter>

<parameter name="options">
<longdesc lang="en">
Any extra options to be given as -o options to mount.

For bind mounts, add "bind" here and set fstype to "none".
We will do the right thing for options such as "bind,ro".
</longdesc>
<shortdesc lang="en">options</shortdesc>
<content type="string" default="${OCF_RESKEY_options_default}" />
</parameter>

<parameter name="statusfile_prefix">
<longdesc lang="en">
The prefix to be used for a status file for resource monitoring
with depth 20. If you don't specify this parameter, all status
files will be created in a separate directory.
</longdesc>
<shortdesc lang="en">status file prefix</shortdesc>
<content type="string" default="${OCF_RESKEY_statusfile_prefix_default}" />
</parameter>

<parameter name="run_fsck">
<longdesc lang="en">
Specify how to decide whether to run fsck or not.

"auto"  : decide to run fsck depending on the fstype(default)
"force" : always run fsck regardless of the fstype
"no"    : do not run fsck ever.
</longdesc>
<shortdesc lang="en">run_fsck</shortdesc>
<content type="string" default="${OCF_RESKEY_run_fsck_default}" />
</parameter>

<parameter name="fast_stop">
<longdesc lang="en">
Normally, we expect no users of the filesystem and the stop
operation to finish quickly. If you cannot control the filesystem
users easily and want to prevent the stop action from failing,
then set this parameter to "no" and add an appropriate timeout
for the stop operation.

This defaults to "no" for GFS2 filesystems.
</longdesc>
<shortdesc lang="en">fast stop</shortdesc>
<content type="boolean" default="${OCF_RESKEY_fast_stop_default}" />
</parameter>

<parameter name="force_clones">
<longdesc lang="en">
The use of a clone setup for local filesystems is forbidden
by default. For special setups like glusterfs, cloning a mount
of a local device with a filesystem like ext4 or xfs independently
on several nodes is a valid use case.

Only set this to "true" if you know what you are doing!
</longdesc>
<shortdesc lang="en">allow running as a clone, regardless of filesystem type</shortdesc>
<content type="boolean" default="${OCF_RESKEY_force_clones_default}" />
</parameter>

<parameter name="force_unmount">
<longdesc lang="en"><![CDATA[
This option allows specifying how to handle processes that are
currently accessing the mount directory.

"true"  : Kill processes accessing mount point
"safe"  : Kill processes accessing mount point using methods that
          avoid functions that could potentially block during process
          detection
"false" : Do not kill any processes.
"move"  : like "safe", but try to mount --move first

The 'safe' option uses shell logic to walk the /proc/<pid>/ directories
for pids using the mount point while the default option uses the
fuser cli tool. fuser is known to perform operations that can potentially
block if unresponsive nfs mounts are in use on the system.

If new users of the file system are being spawned continuously by unmanaged 3rd
party apps, we likely never win the race and the file system will be kept busy.
Which may result in a timeout and stop failure, potentially escalating to
hard-reset of this node via fencing.

The 'move' option tries to move the mount point somewhere those "rogue apps"
do not expect it, then proceed to kill current users and attempt to umount.

For 'move' to work, you will have to make sure the mount point does not reside
under a shared mount, for example by mount -o bind,private /mount /mount
before mounting /mount/point.
]]></longdesc>
<shortdesc lang="en">Kill processes before unmount</shortdesc>
<content type="string" default="${OCF_RESKEY_force_unmount_default}" />
</parameter>

<parameter name="term_signals">
<longdesc lang="en">
Signals (names or numbers, whitespace separated) to send processes during graceful termination phase in stop-action.
</longdesc>
<shortdesc lang="en">Signals (names or numbers, whitespace separated) to send processes during graceful termination phase in stop-action</shortdesc>
<content type="string" default="${OCF_RESKEY_term_signals_default}" />
</parameter>

<parameter name="kill_signals">
<longdesc lang="en">
Signals (names or numbers, whitespace separated) to send processes during forceful killing phase in stop-action.
</longdesc>
<shortdesc lang="en">Signals (names or numbers, whitespace separated) to send processes during forceful killing phase in stop-action</shortdesc>
<content type="string" default="${OCF_RESKEY_kill_signals_default}" />
</parameter>

<parameter name="signal_delay">
<longdesc lang="en">
How many seconds to wait after sending term/kill signals to processes in stop-action.
</longdesc>
<shortdesc lang="en">How many seconds to wait after sending term/kill signals to processes in stop-action</shortdesc>
<content type="string" default="${OCF_RESKEY_signal_delay_default}" />
</parameter>

</parameters>

<actions>
<action name="start" timeout="60s" />
<action name="stop" timeout="60s" />
<action name="monitor" depth="0" timeout="40s" interval="20s" />
<action name="validate-all" timeout="5s" />
<action name="meta-data" timeout="5s" />
</actions>
</resource-agent>
END
}

#
#	Make sure the kernel does the right thing with the FS buffers
#	This function should be called after unmounting and before mounting
#	It may not be necessary in 2.4 and later kernels, but it shouldn't hurt
#	anything either...
#
#	It's really a bug that you have to do this at all...
#
flushbufs() {
	if have_binary $BLOCKDEV ; then
		if [ "$blockdevice" = "yes" ] ; then
			$BLOCKDEV --flushbufs $1
			return $?
		fi
	fi
	return 0
}

# Take advantage of /etc/mtab if present, use portable mount command
# otherwise. Normalize format to "dev mountpoint fstype".
is_bind_mount() {
	echo "$options" | grep -w bind >/dev/null 2>&1
}

list_mounts() {
	local inpf=""
	local mount_list=""
	local check_list="x"

	if [ -e "/proc/mounts" ] && ! is_bind_mount; then
		inpf=/proc/mounts
	elif [ -f "/etc/mtab" -a -r "/etc/mtab" ]; then
		inpf=/etc/mtab
	fi

	# Make sure that the mount list has not been changed while reading.
	while [ "$mount_list" != "$check_list" ]; do
		check_list="$mount_list"
		if [ "$inpf" ]; then
			# <device> <mountpoint> <fstype> ...
			# Spaces in device or mountpoint are octal \040 in $inpf
			# Convert literal spaces (field separators) to tabs
			mount_list=$(cut -d' ' -f1,2,3 < $inpf | tr ' ' "$TAB")
		else
			# <device> on <mountpoint> type <fstype> ...
			# Use tabs as field separators
			match_string='\(.*\) on \(.*\) type \([^[:space:]]\+\) .*'
			replace_string="\\1${TAB}\\2${TAB}\\3"
			mount_list=$($MOUNT | sed "s/$match_string/$replace_string/g")
		fi
	done

	# Convert octal \040 to space characters
	printf "$mount_list"
}

determine_blockdevice() {
	if [ $blockdevice = "yes" ]; then
		return
	fi

	# Get the current real device name, if possible.
	# (specified devname could be -L or -U...)
	case "$FSTYPE" in
	nfs4|nfs|aznfs|efs|smbfs|cifs|glusterfs|ceph|tmpfs|overlay|overlayfs|rozofs|zfs|cvfs|none|lustre)
		: ;;
	*)
		match_string="${TAB}${CANONICALIZED_MOUNTPOINT}${TAB}"
		DEVICE=$(list_mounts | grep "$match_string" | cut -d"$TAB" -f1)
		if [ -b "$DEVICE" ]; then
			blockdevice=yes
		fi
		;;
	esac
}

# Lists all filesystems potentially mounted under a given path,
# excluding the path itself.
list_submounts() {
	list_mounts | grep "${TAB}${1}/" | cut -d"$TAB" -f2 | sort -r
}

# Lists all bind mounts of a given file system,
# excluding the path itself.
list_bindmounts() {
	if is_bind_mount; then
		# skip bind mount
		# we should not umount the original file system via a bind mount
		return
	fi

	match_string="${TAB}${1}${TAB}"
	if list_mounts | grep "$match_string" >/dev/null 2>&1; then
		mount_disk=$(list_mounts | grep "$match_string" | cut -d"$TAB" -f1)
	else
		return
	fi

	if [ -b "$mount_disk" ]; then
		list_mounts | grep -w "$mount_disk" | grep -v "$match_string" | cut -d"$TAB" -f2 | sort -r
	fi
}

# kernels < 2.6.26 can't handle bind remounts
bind_kernel_check() {
	echo "$options" | grep -w ro >/dev/null 2>&1 ||
		return
	uname -r | awk -F. '
	$1==2 && $2==6 {
		sub("[^0-9].*","",$3);
		if ($3<26)
			exit(1);
	}'
	[ $? -ne 0 ] &&
		ocf_log warn "kernel $(uname -r) cannot handle read only bind mounts"
}

bind_root_mount_check() {
	if [ "$(df -P "$1"  | awk 'END{print $6}')" = "/" ]; then
		return 1
	else
		return 0
	fi
}

bind_mount() {
	if is_bind_mount && [ "$options" != "-o bind" ]
	then
		bind_kernel_check
		bind_opts=$(echo "$options" | sed 's/bind/remount/')
		$MOUNT $bind_opts "$MOUNTPOINT"
	else
		true # make sure to return OK
	fi
}

is_option() {
	echo "$OCF_RESKEY_options" | grep -w "$1" >/dev/null 2>&1
}

is_fsck_needed() {
	case $OCF_RESKEY_run_fsck in
		force) true;;
		no)    false;;
		""|auto)
		case "$FSTYPE" in
			ext4|ext4dev|ext3|reiserfs|reiser4|nss|xfs|jfs|vfat|fat|nfs4|nfs|aznfs|efs|cifs|smbfs|ocfs2|gfs2|none|lustre|glusterfs|ceph|tmpfs|overlay|overlayfs|rozofs|zfs|cvfs)
			false;;
			*)
			true;;
		esac;;
		*)
		ocf_log warn "Invalid parameter value for fsck: '$OCF_RESKEY_run_fsck'; setting to 'auto'"
		OCF_RESKEY_run_fsck="auto"
		is_fsck_needed;;
	esac
}

fstype_supported()
{
	local support="$FSTYPE"
	local rc

	if [ "X${HOSTOS}" = "XOpenBSD" ];then
		# skip checking /proc/filesystems for obsd
		return $OCF_SUCCESS
	fi

	if [ -z "$FSTYPE" -o "$FSTYPE" = none ]; then
		: No FSTYPE specified, rely on the system has the right file-system support already
		return $OCF_SUCCESS
	fi

	# support fuse-filesystems (e.g. GlusterFS) and Amazon Elastic File
	# System (EFS)
	case "$FSTYPE" in
		fuse.*|glusterfs|rozofs) support="fuse";;
		efs) check_binary "mount.efs"; support="nfs4";;
		aznfs) check_binary "mount.aznfs"; support="nfs4";;
	esac

	if [ "$support" != "$FSTYPE" ]; then
		ocf_log info "Checking support for $FSTYPE as \"$support\""
	fi

	grep -w "$support"'$' /proc/filesystems >/dev/null
	if [ $? -eq 0 ]; then
		# found the fs type
		return $OCF_SUCCESS
	fi

	# if here, we should attempt to load the module and then
	# check the if the filesystem support exists again.
	$MODPROBE $support >/dev/null
	if [ $? -ne 0 ]; then
		ocf_exit_reason "Couldn't find filesystem $support in /proc/filesystems and failed to load kernel module"
		return $OCF_ERR_INSTALLED
	fi

	# It is possible for the module to load and not be complete initialized
	# before we check /proc/filesystems again. Give this a few trys before
	# giving up entirely.
	for try in $(seq 5); do
		grep -w "$support"'$' /proc/filesystems >/dev/null
		if [ $? -eq 0 ] ; then
			# yes. found the filesystem after doing the modprobe
			return $OCF_SUCCESS
		fi
		ocf_log debug "Unable to find support for $support in /proc/filesystems after modprobe, trying again"
		sleep 1
	done

	ocf_exit_reason "Couldn't find filesystem $support in /proc/filesystems"
	return $OCF_ERR_INSTALLED
}


#
# In the case a fresh filesystem is just created from another
# node on the shared storage, and is not visible yet. Then try
# partprobe to refresh /dev/disk/by-{label,uuid}/* up to date.
#
# DEVICE can be /dev/xxx, -U, -L
#
trigger_udev_rules_if_needed()
{
	local refresh_flag="no"
	local tmp
	local timeout

	if [ $blockdevice = "yes" ]; then
		tmp="$DEVICE"
		if [ "$DEVICE" != "/dev/null" -a ! -b "$DEVICE" ] ; then
			refresh_flag="yes"
		fi
	else
		tmp="$(echo $DEVICE|awk '{$1=""; print substr($0,2)}')"
		case "$DEVICE" in
		-U*|--uuid*)
			tmp="/dev/disk/by-uuid/$tmp"
			;;
		-L*|--label*)
			tmp="/dev/disk/by-label/$tmp"
			;;
		*)
			# bind mount?
			return ;;
		esac
		[ ! -b "$tmp" ] && refresh_flag="yes"
	fi

	[ "$refresh_flag" = "no" ] && return

	have_binary partprobe && partprobe >/dev/null 2>&1
	timeout=${OCF_RESKEY_CRM_meta_timeout:="60000"}
	timeout=$((timeout/1000))
	have_binary udevadm && udevadm settle -t $timeout --exit-if-exists=$tmp

	return $?
}

#
# START: Start up the filesystem
#
Filesystem_start()
{
	# Check if there are any mounts mounted under the mountpoint
	match_string="${TAB}${CANONICALIZED_MOUNTPOINT}"
	if list_mounts | grep -E "$match_string/\w+" >/dev/null 2>&1; then
		ocf_log err "There is one or more mounts mounted under $MOUNTPOINT."
		return $OCF_ERR_CONFIGURED
	fi

	if $move_before_umount && test -d "$MOVED_CANONICALIZED_MOUNTPOINT"; then
		CANONICALIZED_MOUNTPOINT=$MOVED_CANONICALIZED_MOUNTPOINT \
		move_before_umount=false \
		Filesystem_status
		rc=$?
		if [ $rc != $OCF_NOT_RUNNING ]; then
			msg="move_before_umount=$move_before_umount and $MOVED_CANONICALIZED_MOUNTPOINT status is [$rc]"
			rc=$OCF_ERR_GENERIC
			ocf_exit_reason "$msg"
			return $rc
		fi
	fi

	# See if the device is already mounted.
	move_before_umount=false Filesystem_status
	case "$?" in
		$OCF_SUCCESS)
			ocf_log info "Filesystem $MOUNTPOINT is already mounted."
			return $OCF_SUCCESS
			;;
		$OCF_ERR_CONFIGURED)
			return $OCF_ERR_CONFIGURED
			;;
	esac

	fstype_supported || exit $OCF_ERR_INSTALLED

	# Check the filesystem & auto repair.
	# NOTE: Some filesystem types don't need this step...  Please modify
	#       accordingly

	trigger_udev_rules_if_needed

	if [ $blockdevice = "yes" ]; then
		if [ "$DEVICE" != "/dev/null" -a ! -b "$DEVICE" ] ; then
			ocf_exit_reason "Couldn't find device [$DEVICE]. Expected /dev/??? to exist"
			exit $OCF_ERR_INSTALLED
		fi

		if is_fsck_needed; then
			ocf_log info  "Starting filesystem check on $DEVICE"
			if [ -z "$FSTYPE" ]; then
				$FSCK -p "$DEVICE"
			else
				$FSCK -t "$FSTYPE" -p "$DEVICE"
			fi

			# NOTE: if any errors at all are detected, it returns non-zero
			# if the error is >= 4 then there is a big problem
			if [ $? -ge 4 ]; then
				ocf_exit_reason "Couldn't successfully fsck filesystem for $DEVICE"
				return $OCF_ERR_GENERIC
			fi
		fi
	fi

	[ -d "$MOUNTPOINT" ] ||
		ocf_run mkdir -p "$MOUNTPOINT"
	if [ ! -d "$MOUNTPOINT" ] ; then
		ocf_exit_reason "Couldn't find directory  [$MOUNTPOINT] to use as a mount point"
		exit $OCF_ERR_INSTALLED
	fi

	flushbufs "$DEVICE"
	# Mount the filesystem.
	case "$FSTYPE" in
		none) $MOUNT $options $device_opt "$DEVICE" "$MOUNTPOINT" &&
			bind_mount
			;;
		"") $MOUNT $options $device_opt "$DEVICE" "$MOUNTPOINT" ;;
		*) $MOUNT -t "$FSTYPE" $options $device_opt "$DEVICE" "$MOUNTPOINT" ;;
	esac

	if [ $? -ne 0 ]; then
		ocf_exit_reason "Couldn't mount device [$DEVICE] as $MOUNTPOINT"
		return $OCF_ERR_GENERIC
	fi
	return $OCF_SUCCESS
}
# end of Filesystem_start

get_pids()
{
	local dir=$1
	local procs
	local mmap_procs

	if is_bind_mount && ocf_is_true "$FORCE_UNMOUNT" && ! bind_root_mount_check "$DEVICE"; then
		ocf_log debug "Change force_umount from '$FORCE_UNMOUNT' to 'safe'"
		FORCE_UNMOUNT=safe
	fi

	if ocf_is_true  "$FORCE_UNMOUNT"; then
		if [ "X${HOSTOS}" = "XOpenBSD" ]; then
			fstat | grep "$dir" | awk '{print $3}'
		elif [ "X${HOSTOS}" = "XFreeBSD" ]; then
			$FUSER -c "$dir" 2>/dev/null
		else
			$FUSER -Mm "$dir" 2>/dev/null
		fi
	elif [ "$FORCE_UNMOUNT" = "safe" ]; then
		# Yes, in theory, ${dir} could contain "intersting" characters
		# and would need to be quoted for glob (find) and regex (grep).
		# Don't do that, then.

		# Avoid /proc/[0-9]*, it may cause "Argument list too long".
		# There are several ways to filter for /proc/<pid>
		# -mindepth 1 -not -path "/proc/[0-9]*" -prune -o ...
		# -path "/proc/[!0-9]*" -prune -o ...
		# -path "/proc/[0-9]*" -a ...
		# the latter seemd to be significantly faster for this one in my naive test.

		# root, cwd, exe, maps, fd: all per process, not per task ("thread").
		# -maxdepth to avoid repeatedly scanning the same thing
		# for all threads of a heavily threaded process.
		#
		# Adding -maxdepth reduced scanning from > 16 seconds to < 2 seconds
		# on a mostly idle system that happened to run a few java processes.
		#
		# We can also add a dedicated helper in C do twhat is done below,
		# which would reduce the scanning time by an
		# additional factor of 10 again.
		#
		# Or trust that fuser (above) learned something in the last 15 years
		# and avoids blocking operations meanwhile?
		(
		# If you want to debug this, drop this redirection.
		# But it producess too much "No such file" noise for kernel
		# threads or due to races with exiting processes or closing fds.
		exec 2>/dev/null;
		find /proc -mindepth 1 -maxdepth 3 \
			-path "/proc/[0-9]*" \
			-type l \( -lname "${dir}/*" -o -lname "${dir}" \) -print |
		awk -F/ '{print $3}' | uniq

		# If we have "map_files/", "find" above already found the
		# relevant symlinks, and we don't need to grep "maps" below.
		# Available since kernel 3.3, respectively 4.3.
		test -d /proc/$$/map_files ||
			# memory mappings are also per process, not per task.
			# This finds only /proc/<pid>/maps, and not /proc/<pid>/task/<tid>/maps;
			# if you also want the latter, drop -maxdepth.
			find /proc -mindepth 2 -maxdepth 2 -path "/proc/[0-9]*/maps" -print |
			xargs -r grep -l " ${dir}/" | awk -F/ '{print $3}' | uniq
		) | sort -u
	fi
}

signal_processes() {
	local dir=$1
	local sig=$2
	local pids pid
	local nr_pids
	local sed_script=""
	# fuser returns a non-zero return code if none of the
	# specified files is accessed or in case of a fatal
	# error.
	# We don't know whether we have one single line or mutiple lines.
	# Canonicalize.
	pids=$(get_pids "$dir" | tr -s ' \t\n' '\n')
	nr_pids=$(echo "$pids" | grep -c "^.")

	# for many pids, reporting and killing them indivitually just takes too long.
	# may even be too many words for the shell!
	#
	# If the list of pids is too long for a single shell variable,
	# fix your fork bomb workload, and get a better shell anyways.
	#
	if [ $nr_pids = 0 ]; then
		ocf_log info "No processes on $dir were signalled. force_unmount is set to '$FORCE_UNMOUNT'"
		return 1
	elif [ $nr_pids -le 24 ]; then
		for pid in $pids; do
			ocf_log info "sending signal $sig to: $(ps -f $pid | tail -1)"
			kill -s $sig $pid
		done
	else
		echo "$pids" | xargs -r kill -s $sig
		sed_script="11 s/^.*/... and more .../; 12,$(( $nr_pids - 10))d"
		pids=$(echo "$pids" | sed -e "$sed_script" | tr '\n' ' ')
		ocf_log info "sent signals $sig to ${nr_pids} processes [${pids}]"
	fi
	return 0
}
try_umount() {
	local force_arg="$1" SUB="$2"
	$UMOUNT $force_arg "$SUB"
	list_mounts | grep "${TAB}${SUB}${TAB}" >/dev/null 2>&1 || {
		ocf_log info "unmounted $SUB successfully"
		return $OCF_SUCCESS
	}
	return $OCF_ERR_GENERIC
}
timeout_child() {
	local pid="$1" timeout="$2" killer ret

	# start job in the background that will KILL the given process after timeout expires
	sleep $timeout && kill -s KILL $pid &
	killer=$!

	# block until the child process either exits on its own or gets killed by the above killer pipeline
	wait $pid
	ret=$?

	# ret would be 127 + child exit code if the timeout expired (see "man wait" for more info)
	[ $ret -lt 128 ] && kill -s KILL $killer
	return $ret
}
fs_stop_loop() {
	local force_arg="$1" SUB="$2" signals="$3" sig sent_signal
	while true; do
		sent_signal=false
		for sig in $signals; do
			signal_processes "$SUB" $sig && sent_signal=true
		done
		if $sent_signal; then
			# Try to umount immediately after signalling, to reduce
			# the time window in which new users of the file system
			# may be spawned.
			try_umount "$force_arg" "$SUB" && return $OCF_SUCCESS
			sleep $OCF_RESKEY_signal_delay
		fi
		# Try to umount after the signal_delay, maybe some processes
		# needed a moment to "exit cleanly" after receiving SIGTERM.
		try_umount "$force_arg" "$SUB" && return $OCF_SUCCESS
	done
}

fs_stop() {
	local SUB="$1" timeout=$2 grace_time ret
	grace_time=$((timeout/2))

	# Just walking /proc may take "a long time", even if we don't find any users of this FS.
	# If dependencies are properly configured, umount should just work.
	# Only if that fails, try to find and kill processes that still use it.
	try_umount "" "$SUB" && return $OCF_SUCCESS

	# try gracefully terminating processes for up to half of the configured timeout
	fs_stop_loop "" "$SUB" "$OCF_RESKEY_term_signals" &
	timeout_child $! $grace_time
	ret=$?
	[ $ret -eq $OCF_SUCCESS ] && return $ret

	# try killing them for the rest of the timeout
	fs_stop_loop "$umount_force" "$SUB" "$OCF_RESKEY_kill_signals" &
	timeout_child $! $grace_time
	ret=$?
	[ $ret -eq $OCF_SUCCESS ] && return $ret

	# timeout expired
	ocf_exit_reason "Couldn't unmount $SUB within given timeout"
	return $OCF_ERR_GENERIC
}

try_mount_move()
{
	test -d "$MOVED_CANONICALIZED_MOUNTPOINT" || mkdir "$MOVED_CANONICALIZED_MOUNTPOINT" || return
	mount --move "$CANONICALIZED_MOUNTPOINT" "$MOVED_CANONICALIZED_MOUNTPOINT" || return
	ocf_log info "Moved $MOUNTPOINT to $MOVED_CANONICALIZED_MOUNTPOINT"
	return 0
	# To test really bad timing of "action timeout":
	# test -e /tmp/fail-after-move && rm -f /tmp/fail-after-move && kill -KILL $$
}

#
# STOP: Unmount the filesystem
#
Filesystem_stop()
{
	# See if the device is currently mounted
	move_before_umount=false Filesystem_status >/dev/null 2>&1
	if [ $? -eq $OCF_NOT_RUNNING ]; then
		# Already unmounted, wonderful.
		# But did we also unmount the moved fs?
		if $move_before_umount; then
			CANONICALIZED_MOUNTPOINT=$MOVED_CANONICALIZED_MOUNTPOINT \
			move_before_umount=false \
			Filesystem_stop
			return $?
		else
			rc=$OCF_SUCCESS
		fi
	else
		# Wipe the status file, but continue with a warning if
		# removal fails -- the file system might be read only
		if [ $OCF_CHECK_LEVEL -eq 20 ]; then
			rm -f "${STATUSFILE}"
			if [ $? -ne 0 ]; then
				ocf_log warn "Failed to remove status file ${STATUSFILE}."
			fi
		fi

		# Determine the real blockdevice this is mounted on (if
		# possible) prior to unmounting.
		determine_blockdevice

		# For networked filesystems, there's merit in trying -f:
		case "$FSTYPE" in
		nfs4|nfs|aznfs|efs|cifs|smbfs) umount_force="-f" ;;
		esac

		if $move_before_umount && try_mount_move ; then
			CANONICALIZED_MOUNTPOINT=$MOVED_CANONICALIZED_MOUNTPOINT \
			move_before_umount=false \
			Filesystem_stop
			return $?
		fi

		# Umount all sub-filesystems mounted under $MOUNTPOINT/ too.
		local timeout
		while read SUB; do
			ocf_log info "Trying to unmount $SUB"
			if ocf_is_true "$FAST_STOP"; then
				timeout=6
			else
				timeout=${OCF_RESKEY_CRM_meta_timeout:="20000"}
				timeout=$((timeout/1000))
			fi
			fs_stop "$SUB" $timeout
			rc=$?
			if [ $rc -ne $OCF_SUCCESS ]; then
				ocf_exit_reason "Couldn't unmount $SUB, giving up!"
			fi
		done <<-EOF
			$(list_submounts "$CANONICALIZED_MOUNTPOINT"; \
				list_bindmounts "$CANONICALIZED_MOUNTPOINT"; \
				echo $CANONICALIZED_MOUNTPOINT)
			EOF
	fi

	flushbufs "$DEVICE"

	return $rc
}
# end of Filesystem_stop

#
# STATUS: is the filesystem mounted or not?
#
Filesystem_status()
{
	if $move_before_umount && test -d $MOVED_CANONICALIZED_MOUNTPOINT; then
		# Have to recurse once.
		CANONICALIZED_MOUNTPOINT=$MOVED_CANONICALIZED_MOUNTPOINT \
		move_before_umount=false \
		OP= \
		Filesystem_status
		rc=$?
		if [ $rc = $OCF_SUCCESS ]; then
			rc=$OCF_ERR_GENERIC
			msg="move_before_umount=$move_before_umount and something is mounted on $MOVED_CANONICALIZED_MOUNTPOINT"
			ocf_exit_reason "$msg"
			return $rc
		fi
	fi

	local match_string="${TAB}${CANONICALIZED_MOUNTPOINT}${TAB}"
	local mounted_device=$(list_mounts | grep "$match_string" | awk '{print $1}')

	if [ -n "$mounted_device" ]; then
		if [ "X$blockdevice" = "Xyes" ]; then
			if [ -e "$DEVICE" ] ; then
				local canonicalized_device="$(readlink -f "$DEVICE")"
				if [ $? -ne 0 ]; then
					ocf_exit_reason "Could not canonicalize $DEVICE because readlink failed"
					exit $OCF_ERR_GENERIC
				fi
			else
				local canonicalized_device="$DEVICE"
			fi
			if [ -e "$mounted_device" ] ; then
				local canonicalized_mounted_device="$(readlink -f "$mounted_device")"
				if [ $? -ne 0 ]; then
					ocf_exit_reason "Could not canonicalize $mounted_device because readlink failed"
					exit $OCF_ERR_GENERIC
				fi
			else
				local canonicalized_mounted_device="$mounted_device"
			fi
			if [ "$canonicalized_device" != "$canonicalized_mounted_device" ]; then
				if ocf_is_probe || [ "$__OCF_ACTION" = "stop" ]; then
					ocf_log debug "Another device ($mounted_device) is already mounted on $MOUNTPOINT"
					rc=$OCF_NOT_RUNNING
				else
					ocf_exit_reason "Another device ($mounted_device) is already mounted on $MOUNTPOINT"
					rc=$OCF_ERR_CONFIGURED
				fi
			fi
		else
			rc=$OCF_SUCCESS
			msg="$MOUNTPOINT is mounted (running)"
		fi
	else
		rc=$OCF_NOT_RUNNING
		msg="$MOUNTPOINT is unmounted (stopped)"
	fi

	# Special case "monitor" to check whether the UUID cached and
	# on-disk still match?
	case "$OP" in
		status)	ocf_log info "$msg";;
	esac

	return $rc
}
# end of Filesystem_status


# Note: the read/write tests below will stall in case the
# underlying block device (or in the case of a NAS mount, the
# NAS server) has gone away. In that case, if I/O does not
# return to normal in time, the operation hits its timeout
# and it is up to the CRM to initiate appropriate recovery
# actions (such as fencing the node).
#
# MONITOR 10: read the device
#
Filesystem_monitor_10()
{
	if [ "$blockdevice" = "no" ] ; then
		ocf_log warn "$DEVICE is not a block device, monitor 10 is noop"
		return $OCF_SUCCESS
	fi
	dd_opts="iflag=direct bs=4k count=1"
	err_output=$(dd if="$DEVICE" $dd_opts 2>&1 >/dev/null)
	if [ $? -ne 0 ]; then
		ocf_exit_reason "Failed to read device $DEVICE"
		ocf_log err "dd said: $err_output"
		return $OCF_ERR_GENERIC
	fi
	return $OCF_SUCCESS
}
#
# MONITOR 20: write and read a status file
#
Filesystem_monitor_20()
{
	if [ "$blockdevice" = "no" ] ; then
		# O_DIRECT not supported on cifs/smbfs
		dd_opts="oflag=sync bs=4k conv=fsync,sync"
	else
		# Writing to the device in O_DIRECT mode is imperative
		# to bypass caches.
		dd_opts="oflag=direct,sync bs=4k conv=fsync,sync"
	fi
	status_dir=$(dirname "$STATUSFILE")
	[ -d "$status_dir" ] || mkdir -p "$status_dir"
	err_output=$(echo "${OCF_RESOURCE_INSTANCE}" | dd of="${STATUSFILE}" $dd_opts 2>&1)
	if [ $? -ne 0 ]; then
		ocf_exit_reason "Failed to write status file ${STATUSFILE}"
		ocf_log err "dd said: $err_output"
		return $OCF_ERR_GENERIC
	fi
	test -f "${STATUSFILE}"
	if [ $? -ne 0 ]; then
		ocf_exit_reason "Cannot stat the status file ${STATUSFILE}"
		return $OCF_ERR_GENERIC
	fi
	cat "${STATUSFILE}" > /dev/null
	if [ $? -ne 0 ]; then
		ocf_exit_reason "Cannot read the status file ${STATUSFILE}"
		return $OCF_ERR_GENERIC
	fi
	return $OCF_SUCCESS
}

Filesystem_monitor()
{
	Filesystem_status
	rc=$?
	if [ $rc -ne $OCF_SUCCESS ]; then
		return $rc
	fi

	if [ $rc -eq $OCF_SUCCESS -a $OCF_CHECK_LEVEL -gt 0 ]; then
		case "$OCF_CHECK_LEVEL" in
		10) Filesystem_monitor_10; rc=$?;;
		20) Filesystem_monitor_20; rc=$?;;
		*)
			ocf_exit_reason "unsupported monitor level $OCF_CHECK_LEVEL"
			rc=$OCF_ERR_CONFIGURED
		;;
		esac
	fi
	return $rc
}
# end of Filesystem_monitor


#
#	VALIDATE_ALL: Are the instance parameters valid?
#	FIXME!!  The only part that's useful is the return code.
#	This code always returns $OCF_SUCCESS (!)
#	FIXME!! Needs some tuning to match fstype_supported() (e.g., for
#	fuse). Can we just call fstype_supported() with a flag like
#	"no_modprobe" instead?
#
Filesystem_validate_all()
{
	# If we are supposed to do monitoring with status files, then
	# we need a utility to write in O_DIRECT mode.
	if [ $OCF_CHECK_LEVEL -gt 0 ]; then
		check_binary dd
		# Note: really old coreutils version do not support
		# the "oflag" option for dd. We don't check for that
		# here. In case dd does not support oflag, monitor is
		# bound to fail, with dd spewing an error message to
		# the logs. On such systems, we must do without status
		# file monitoring.
	fi

	#TODO: How to check the $options ?
	return $OCF_SUCCESS
}

#
# set the blockdevice variable to "no" or "yes"
#
set_blockdevice_var() {
	blockdevice=no

	# these are definitely not block devices
	case "$FSTYPE" in
	nfs4|nfs|aznfs|efs|smbfs|cifs|none|glusterfs|ceph|tmpfs|overlay|overlayfs|rozofs|zfs|cvfs|lustre) return;;
	esac

	if $(is_option "loop"); then
		return
	fi

	case "$DEVICE" in
	--uuid=*|--uuid\ *|--label=*|--label\ *)
		device_opt=$(echo $DEVICE | sed "s/\([[:blank:]]\|=\).*//")
		DEVICE=$(echo $DEVICE | sed -E "s/$device_opt([[:blank:]]*|=)//")
		;;
	-U*|-L*)  # short versions of --uuid/--label
		device_opt=$(echo $DEVICE | cut -c1-2)
		DEVICE=$(echo $DEVICE | sed "s/$device_opt[[:blank:]]*//")
		;;
	/dev/null) # Special case for BSC
		blockdevice=yes
		;;
	*)
		if [ ! -b "$DEVICE"  -a ! -d "$DEVICE" -a "X$OP" != Xstart ] && ! ocf_is_probe ; then
			ocf_log warn "Couldn't find device [$DEVICE]. Expected /dev/??? to exist"
		fi
		if [ ! -d "$DEVICE" ]; then
			blockdevice=yes
		fi
		;;
	esac
}

# Check the arguments passed to this script
if [ $# -ne 1 ]; then
	usage
	exit $OCF_ERR_ARGS
fi

# Check the OCF_RESKEY_ environment variables...
FORCE_UNMOUNT="yes"
if [ -n "${OCF_RESKEY_force_unmount}" ]; then
	FORCE_UNMOUNT=$OCF_RESKEY_force_unmount
fi

DEVICE="$OCF_RESKEY_device"
case "$DEVICE" in
	[[:space:]]*|*[[:space:]])
		[ "$__OCF_ACTION" = "stop" ] && exit $OCF_SUCCESS
		ocf_exit_reason "device parameter does not accept leading or trailing whitespace characters"
		exit $OCF_ERR_CONFIGURED
		;;
esac
FSTYPE=$OCF_RESKEY_fstype
if [ ! -z "$OCF_RESKEY_options" ]; then
	options="-o $OCF_RESKEY_options"
fi
FAST_STOP=${OCF_RESKEY_fast_stop:="yes"}

case $FORCE_UNMOUNT in
	move) move_before_umount=true; FORCE_UNMOUNT=safe ;;
	*)    move_before_umount=false ;;
esac

OP=$1

# These operations do not require instance parameters
case $OP in
	meta-data) meta_data
		exit $OCF_SUCCESS
		;;
	usage) usage
		exit $OCF_SUCCESS
		;;
esac

if [ x = x"$DEVICE" ]; then
	ocf_exit_reason "Please set OCF_RESKEY_device to the device to be managed"
	exit $OCF_ERR_CONFIGURED
fi

set_blockdevice_var

# Normalize instance parameters:

# It is possible that OCF_RESKEY_directory has one or even multiple trailing "/".
# But the output of `mount` and /proc/mounts do not.
if [ -z "$OCF_RESKEY_directory" ]; then
	if [ X$OP = "Xstart" -o $blockdevice = "no" ]; then
		ocf_exit_reason "Please specify the directory"
		exit $OCF_ERR_CONFIGURED
	fi
else
	MOUNTPOINT="$(echo "$OCF_RESKEY_directory" | sed 's/\/*$//')"
	: ${MOUNTPOINT:=/}
	case "$MOUNTPOINT" in
		[[:space:]]*|*[[:space:]])
			[ "$__OCF_ACTION" = "stop" ] && exit $OCF_SUCCESS
			ocf_exit_reason "directory parameter does not accept leading or trailing whitespace characters"
			exit $OCF_ERR_CONFIGURED
			;;
	esac
	if [ -e "$MOUNTPOINT" ] ; then
		CANONICALIZED_MOUNTPOINT="$(readlink -f "$MOUNTPOINT")"
		if [ $? -ne 0 ]; then
			ocf_exit_reason "Could not canonicalize $MOUNTPOINT because readlink failed"
			exit $OCF_ERR_GENERIC
		fi
	else
		CANONICALIZED_MOUNTPOINT="$MOUNTPOINT"
	fi

	if echo "$CANONICALIZED_MOUNTPOINT" | grep -q "^\s*/\s*$"; then
		if ocf_is_probe; then
			ocf_log debug "/ cannot be managed in a cluster"
			exit $OCF_NOT_RUNNING
		elif [ "$__OCF_ACTION" = "start" ] || [ "$__OCF_ACTION" = "monitor" ] || [ "$__OCF_ACTION" = "status" ]; then
			ocf_exit_reason "/ cannot be managed in a cluster"
			exit $OCF_ERR_CONFIGURED
		elif [ "$__OCF_ACTION" = "stop" ]; then
			exit $OCF_SUCCESS
		fi
	fi
fi

MOVED_CANONICALIZED_MOUNTPOINT=$(echo "$CANONICALIZED_MOUNTPOINT" | sed -e 's,/\([^/]\+\)$,/.\1,')


# Check to make sure the utilites are found
if [ "X${HOSTOS}" != "XOpenBSD" ];then
check_binary $MODPROBE
check_binary $FUSER
fi
check_binary $FSCK
check_binary $MOUNT
check_binary $UMOUNT

if [ "$OP" != "monitor" ] && [ "$OP" != "validate-all" ]; then
	ocf_log info "Running $OP for $DEVICE on $MOUNTPOINT"
fi

case $OP in
	status) Filesystem_status
		exit $?
		;;
	monitor) Filesystem_monitor
		exit $?
		;;
	validate-all) Filesystem_validate_all
		exit $?
		;;
	stop) Filesystem_stop
		exit $?
		;;
esac

CLUSTERSAFE=0
is_option "ro" &&
	CLUSTERSAFE=2

case "$FSTYPE" in
none|overlay|overlayfs|tmpfs)
	CLUSTERSAFE=1 # this is kind of safe too
	;;
nfs4|nfs|aznfs|efs|smbfs|cifs|gfs2|glusterfs|ceph|ocfs2|cvfs|lustre)
	CLUSTERSAFE=1 # this is kind of safe too
	systemd_drop_in "99-Filesystem-remote" "After" "remote-fs.target"
	;;
# add here CLUSTERSAFE=0 for all filesystems which are not
# cluster aware and which, even if when mounted read-only,
# could still modify parts of it such as journal/metadata
ext4|ext4dev|ext3|reiserfs|reiser4|xfs|jfs)
	if ocf_is_true "$OCF_RESKEY_force_clones"; then
		CLUSTERSAFE=2
		systemd_drop_in "99-Filesystem-remote" "After" "remote-fs.target"
	else
		CLUSTERSAFE=0 # these are not allowed
	fi
	;;
esac

if ocf_is_clone; then
	case $CLUSTERSAFE in
	0)
		ocf_exit_reason "DANGER! $FSTYPE on $DEVICE is NOT cluster-aware!"
		ocf_log err "DO NOT RUN IT AS A CLONE!"
		ocf_log err "Politely refusing to proceed to avoid data corruption."
		exit $OCF_ERR_CONFIGURED
		;;
	2)
		ocf_log warn "$FSTYPE on $DEVICE is NOT cluster-aware!"
		if ocf_is_true "$OCF_RESKEY_force_clones"; then
			ocf_log warn "But we'll let it run because we trust _YOU_ verified it's safe to do so."
		else
			ocf_log warn "But we'll let it run because it is mounted read-only."
			ocf_log warn "Please make sure that it's meta data is read-only too!"
		fi
		;;
	esac
fi

case $OP in
	start) Filesystem_start
		;;
	*) usage
		exit $OCF_ERR_UNIMPLEMENTED
		;;
	esac
exit $?