#!/bin/bash
#
# lc_cluman - script for generating the Red Hat Cluster Manager
#	      HA software's configuration files
#
################################################################################

# Usage
usage() {
	cat >&2 <<EOF

Usage:  `basename $0` <-n hostnames> [-s service addresses]
		      [-c heartbeat channel] [-o heartbeat options] [-v]
		      <-d target device> [-d target device...]

	-n hostnames            the nodenames of the primary node and its fail-
				overs
                        	Multiple nodenames are separated by colon (:)
                        	delimeter. The first one is the nodename of the 
				primary node, the others are failover nodenames.
	-s service addresses    the IP addresses to failover
				Multiple addresses are separated by colon (:)
				delimeter.
	-c heartbeat channel	the method to send/rcv heartbeats on
				The default method is multicast, and multicast_
				ipaddress is "225.0.0.11".
	-o heartbeat options    a "catchall" for other heartbeat configuration 
				options
				Multiple options are separated by colon (:)
				delimeter.
	-v			verbose mode
	-d target device        the target device name and mount point
                        	The device name and mount point are separated by
				colon (:) delimeter. 

EOF
	exit 1
}

# Get the library of functions
. /usr/lib/lustre/lc_common

#****************************** Global variables ******************************#
TMP_DIR=${CLUMGR_TMP_DIR}		# Temporary directory

declare -a NODE_NAMES			# Node names in the failover group
declare -a SRV_IPADDRS			# Service IP addresses

# Lustre target device names, service names and mount points
declare -a TARGET_DEVNAMES TARGET_SRVNAMES TARGET_MNTPNTS
declare -i TARGET_NUM=0			# Number of targets

# Get and check the positional parameters
VERBOSE_OUTPUT=false
while getopts "n:s:c:o:vd:" OPTION; do
	case $OPTION in
        n)
		HOSTNAME_OPT=$OPTARG 
		PRIM_NODENAME=`echo ${HOSTNAME_OPT} | awk -F":" '{print $1}'`
		if [ -z "${PRIM_NODENAME}" ]; then
			echo >&2 $"`basename $0`: Missing primary nodename!"
			usage
		fi
		HOSTNAME_NUM=`echo ${HOSTNAME_OPT} | awk -F":" '{print NF}'`
		if [ ${HOSTNAME_NUM} -lt 2 ]; then
			echo >&2 $"`basename $0`: Missing failover nodenames!"
			usage
		fi
		;;
        s)
		SRVADDR_OPT=$OPTARG 
		;;
        c)
		HBCHANNEL_OPT=$OPTARG
		HBCHANNEL_OPT=`echo "${HBCHANNEL_OPT}" | sed 's/^"//' \
                               | sed 's/"$//'` 
		if [ -n "${HBCHANNEL_OPT}" ] \
   		&& [ "${HBCHANNEL_OPT}" = "${HBCHANNEL_OPT#*broadcast*}" ] \
   		&& [ "${HBCHANNEL_OPT}" = "${HBCHANNEL_OPT#*multicast*}" ]; then
			echo >&2 $"`basename $0`: Invalid Heartbeat channel" \
				  "- ${HBCHANNEL_OPT}!"
			usage
		fi
		;;
        o)
		HBOPT_OPT=$OPTARG 
		HBOPT_OPT=`echo "${HBOPT_OPT}" | sed 's/^"//' | sed 's/"$//'`
		;;
	v) 
		VERBOSE_OUTPUT=true
		;;
        d)
		DEVICE_OPT=$OPTARG 
		TARGET_DEVNAMES[TARGET_NUM]=`echo ${DEVICE_OPT}|awk -F: '{print $1}'`
		TARGET_MNTPNTS[TARGET_NUM]=`echo ${DEVICE_OPT}|awk -F: '{print $2}'`
		if [ -z "${TARGET_DEVNAMES[TARGET_NUM]}" ]; then
			echo >&2 $"`basename $0`: Missing target device name!"
			usage
		fi
		if [ -z "${TARGET_MNTPNTS[TARGET_NUM]}" ]; then
			echo >&2 $"`basename $0`: Missing mount point for target"\
				  "${TARGET_DEVNAMES[TARGET_NUM]}!"
			usage
		fi
		TARGET_NUM=$(( TARGET_NUM + 1 ))
		;;

        ?) 
		usage 
	esac
done

# Check the required parameters
if [ -z "${HOSTNAME_OPT}" ]; then
	echo >&2 $"`basename $0`: Missing -n option!"
	usage
fi

if [ -z "${DEVICE_OPT}" ]; then
	echo >&2 $"`basename $0`: Missing -d option!"
	usage
fi

# get_nodenames
#
# Get all the node names in this failover group
get_nodenames() {
	declare -i idx
	local nodename_str nodename

	nodename_str=`echo ${HOSTNAME_OPT}|awk '{split($HOSTNAME_OPT, a, ":")}\
		      END {for (i in a) print a[i]}'`
	idx=0
	for nodename in ${nodename_str}
        do
		NODE_NAMES[idx]=${nodename}
		idx=$idx+1
        done

	return 0
}

# get_check_srvIPaddrs
#
# Get and check all the service IP addresses in this failover group
get_check_srvIPaddrs() {
	declare -i idx
	declare -i i
	local srvIPaddr_str srvIPaddr

	srvIPaddr_str=`echo ${SRVADDR_OPT}|awk '{split($SRVADDR_OPT, a, ":")}\
		      END {for (i in a) print a[i]}'`
	idx=0
	for srvIPaddr in ${srvIPaddr_str}
        do
		SRV_IPADDRS[idx]=${srvIPaddr}
		idx=$idx+1
        done

	for ((idx = 0; idx < ${#SRV_IPADDRS[@]}; idx++)); do
	  for ((i = 0; i < ${#NODE_NAMES[@]}; i++)); do
	    # Check service IP address
	    verbose_output "Verifying service IP ${SRV_IPADDRS[idx]} and" \
	    	           "real IP of host ${NODE_NAMES[i]} are in the" \
			   "same subnet..."
	    if ! ${SCRIPT_VERIFY_SRVIP} ${SRV_IPADDRS[idx]} ${NODE_NAMES[i]}
	    then
	      return 1
	    fi
	    verbose_output "OK"
	  done
	done

	return 0
}

# cluman_running host_name
# 
# Run remote command to check whether clumanager service is running in @host_name
cluman_running() {
	local host_name=$1
	local ret_str

	ret_str=`${REMOTE} ${host_name} "/sbin/service clumanager status" 2>&1`
	if [ $? -ne 0 ]; then
		if [ "${ret_str}" != "${ret_str#*unrecognized*}" ]; then
			echo >&2 "`basename $0`: cluman_running() error:"\
			"remote command to ${host_name} error: ${ret_str}!"
			return 2
		else
			return 1
		fi
	fi

	return 0
}

# stop_cluman host_name
#
# Run remote command to stop clumanager service running in @host_name
stop_cluman() {
	local host_name=$1
	local ret_str

	ret_str=`${REMOTE} ${host_name} "/sbin/service clumanager stop" 2>&1`
	if [ $? -ne 0 ]; then
		echo >&2 "`basename $0`: stop_cluman() error:"\
		"remote command to ${host_name} error: ${ret_str}!"
		return 1
	fi

	echo "`basename $0`: Clumanager service is stopped on node ${host_name}."
	return 0
}

# check_cluman
#
# Run remote command to check each node's clumanager service
check_cluman() {
	declare -i idx
	local OK

	# Get and check all the service IP addresses
	if [ -n "${SRVADDR_OPT}" ] && ! get_check_srvIPaddrs; then
		return 1
	fi

	for ((idx = 0; idx < ${#NODE_NAMES[@]}; idx++)); do
		# Check clumanager service status
		cluman_running ${NODE_NAMES[idx]}
		rc=$?
		if [ "$rc" -eq "2" ]; then
			return 1
		elif [ "$rc" -eq "1" ]; then
			verbose_output "Clumanager service is stopped on"\
			"node ${NODE_NAMES[idx]}."
		elif [ "$rc" -eq "0" ]; then
			OK=
			echo -n "`basename $0`: Clumanager service is running on"\
			"${NODE_NAMES[idx]}, go ahead to stop the service and"\
			"generate new configurations? [y/n]:"
			read OK
			if [ "${OK}" = "n" ]; then
				echo "`basename $0`: New Clumanager configurations"\
				"are not generated."
				return 2
			fi

			# Stop clumanager service	
			stop_cluman ${NODE_NAMES[idx]}
		fi
	done

	return 0
}

# get_srvname hostname target_devname
#
# Get the lustre target server name from the node @hostname
get_srvname() {
	local host_name=$1
	local target_devname=$2
	local target_srvname=
	local ret_str

	# Execute remote command to get the target server name
	ret_str=`${REMOTE} ${host_name} \
		"${TUNEFS} --print --verbose ${target_devname} | grep Target:" 2>&1`
	if [ $? -ne 0 ]; then
		echo "`basename $0`: get_srvname() error:" \
		     "from host ${host_name} - ${ret_str}"
		return 1
	fi

	if [ "${ret_str}" != "${ret_str#*Target: }" ]; then
		ret_str=${ret_str#*Target: }
		target_srvname=`echo ${ret_str} | awk '{print $1}'`
	fi
	
	if [ -z "${target_srvname}" ]; then
		echo "`basename $0`: get_srvname() error: Cannot get the"\
		     "server name of target ${target_devname} in ${host_name}!"
		return 1
	fi

	echo ${target_srvname}
	return 0
} 

# get_srvnames
#
# Get server names of all the Lustre targets in this failover group
get_srvnames() {
	declare -i i

	# Initialize the TARGET_SRVNAMES array
	unset TARGET_SRVNAMES

	# Get Lustre target service names
	for ((i = 0; i < ${#TARGET_DEVNAMES[@]}; i++)); do
		TARGET_SRVNAMES[i]=$(get_srvname ${PRIM_NODENAME} \
				     ${TARGET_DEVNAMES[i]})
		if [ $? -ne 0 ]; then
			echo >&2 "${TARGET_SRVNAMES[i]}"
			return 1
		fi
	done

	return 0
}

# check_retval retval
#
# Check the return value of redhat-config-cluster-cmd
check_retval() {
	if [ $1 -ne 0 ]; then
		echo >&2 "`basename $0`: Failed to run ${CONFIG_CMD}!"
		return 1
	fi

	return 0
}

# add_services
#
# Add service tags into the cluster.xml file
add_services() {
	declare -i idx
	declare -i i

	# Add service tag
	for ((i = 0; i < ${#TARGET_SRVNAMES[@]}; i++)); do
		${CONFIG_CMD} --add_service --name=${TARGET_SRVNAMES[i]}
		if ! check_retval $?; then
			return 1
		fi

		for ((idx = 0; idx < ${#SRV_IPADDRS[@]}; idx++)); do
			${CONFIG_CMD} --service=${TARGET_SRVNAMES[i]} \
			--add_service_ipaddress --ipaddress=${SRV_IPADDRS[idx]}
			if ! check_retval $?; then
				return 1
			fi
		done

		${CONFIG_CMD} --service=${TARGET_SRVNAMES[i]} \
			      --add_device \
			      --name=${TARGET_DEVNAMES[i]}
		if ! check_retval $?; then
			return 1
		fi

		${CONFIG_CMD} --service=${TARGET_SRVNAMES[i]} \
			      --device=${TARGET_DEVNAMES[i]} \
			      --mount \
			      --mountpoint=${TARGET_MNTPNTS[i]} \
			      --fstype=lustre
		if ! check_retval $?; then
			return 1
		fi
	done

	return 0
}

# gen_cluster_xml
#
# Run redhat-config-cluster-cmd to create the cluster.xml file
gen_cluster_xml() {
	declare -i idx
	declare -i i
	local mcast_IPaddr
	local node_names
	local hbopt

	[ -e "${CLUMAN_DIR}/cluster.xml" ] && \
	/bin/mv ${CLUMAN_DIR}/cluster.xml ${CLUMAN_DIR}/cluster.xml.old

	# Run redhat-config-cluster-cmd to generate cluster.xml
	# Add clumembd tag
   	if [ "${HBCHANNEL_OPT}" != "${HBCHANNEL_OPT#*broadcast*}" ]; then
		${CONFIG_CMD} --clumembd --broadcast=yes
		${CONFIG_CMD} --clumembd --multicast=no
		if ! check_retval $?; then
			return 1
		fi
	elif [ "${HBCHANNEL_OPT}" != "${HBCHANNEL_OPT#*multicast*}" ]; then
		mcast_IPaddr=`echo ${HBCHANNEL_OPT} | awk '{print $2}'`
		if [ -n "${mcast_IPaddr}" ]; then
			${CONFIG_CMD} --clumembd --multicast=yes\
				      --multicast_ipaddress=${mcast_IPaddr}
			if ! check_retval $?; then
				return 1
			fi
		fi
	fi

	# Add cluster tag
	node_names=
	for ((idx = 0; idx < ${#NODE_NAMES[@]}; idx++)); do
		node_names=${node_names}"${NODE_NAMES[idx]} "
	done

	${CONFIG_CMD} --cluster --name="${node_names}failover group"
	if ! check_retval $?; then
		return 1
	fi

	# Add member tag
	for ((idx = 0; idx < ${#NODE_NAMES[@]}; idx++)); do
		${CONFIG_CMD} --add_member --name=${NODE_NAMES[idx]}
		if ! check_retval $?; then
			return 1
		fi
	done

	# Add service tag
	if ! add_services; then
		return 1
	fi

	# Add other tags
	if [ -n "${HBOPT_OPT}" ]; then
		while read -r hbopt
        	do
			${CONFIG_CMD} ${hbopt}
			if ! check_retval $?; then
				return 1
			fi
        	done < <(echo ${HBOPT_OPT}|awk '{split($HBOPT_OPT, a, ":")}\
			 END {for (i in a) print a[i]}')
	fi

	return 0
}

# create_config
#
# Create the cluster.xml file and scp it to the each node's /etc/
create_config() {
	declare -i idx

	/bin/mkdir -p ${TMP_DIR}
	CONFIG_PRIMNODE=${TMP_DIR}$"/cluster.xml."${PRIM_NODENAME}
	CONFIG_LUSTRE=${TMP_DIR}$"/cluster.xml"${FILE_SUFFIX}

	# Get server names of Lustre targets
	if ! get_srvnames; then
		return 1
	fi

	if [ -s ${CONFIG_PRIMNODE} ]; then
		if [ -n "`/bin/grep ${TARGET_SRVNAMES[0]} ${CONFIG_PRIMNODE}`" ]
		then
			verbose_output "${CONFIG_PRIMNODE} already exists."
			return 0
		else
			[ -e "${CLUMAN_DIR}/cluster.xml" ] && \
			/bin/mv ${CLUMAN_DIR}/cluster.xml ${CLUMAN_DIR}/cluster.xml.old

			/bin/cp -f ${CONFIG_PRIMNODE} ${CLUMAN_DIR}/cluster.xml 

			# Add services into the cluster.xml file
			if ! add_services; then
				return 1
			fi
		fi
	else
		# Run redhat-config-cluster-cmd to generate cluster.xml
		verbose_output "Creating cluster.xml file for" \
			       "${PRIM_NODENAME} failover group hosts..."
		if ! gen_cluster_xml; then
			return 1
		fi
		verbose_output "OK"
	fi

	/bin/mv ${CLUMAN_DIR}/cluster.xml ${CONFIG_LUSTRE}
	[ -e "${CLUMAN_DIR}/cluster.xml.old" ] && \
	/bin/mv ${CLUMAN_DIR}/cluster.xml.old ${CLUMAN_DIR}/cluster.xml

	# scp the cluster.xml file to all the nodes
	verbose_output "Remote copying cluster.xml${FILE_SUFFIX} file to" \
		       "${PRIM_NODENAME} failover group hosts..."
	for ((idx = 0; idx < ${#NODE_NAMES[@]}; idx++)); do
		/bin/cp -f ${CONFIG_LUSTRE} ${TMP_DIR}$"/cluster.xml."${NODE_NAMES[idx]}

		scp ${CONFIG_LUSTRE} ${NODE_NAMES[idx]}:${CLUMAN_DIR}/
		if [ $? -ne 0 ]; then
			echo >&2 "`basename $0`: Failed to scp cluster.xml file"\
				 "to node ${NODE_NAMES[idx]}!"
			return 1
		fi
	done
	verbose_output "OK"

	return 0
}

# Main flow
# Get all the node names
if ! get_nodenames; then
	exit 1
fi

# Check clumanager services
verbose_output "Checking clumanager service in the ${PRIM_NODENAME}"\
	       "failover group hosts..."
check_cluman
rc=$?
if [ "$rc" -eq "2" ]; then
	verbose_output "OK"
	exit 0
elif [ "$rc" -eq "1" ]; then
	exit 1
fi
verbose_output "OK"

# Generate configuration files
if ! create_config; then
	exit 1
fi

exit 0
