#!/bin/bash

# vim:expandtab:shiftwidth=4:softtabstop=4:tabstop=4:

#
# lc_hb - script for generating the Heartbeat HA software's
#         configuration files
#
###############################################################################

# Usage
usage() {
	cat >&2 <<EOF

Usage:	`basename $0`	<-r HBver> <-n hostnames> [-v]
			<-d target device> [-d target device...]

	-r HBver		the version of Heartbeat software
                        	The Heartbeat software versions which are curr-
				ently supported are: hbv1 (Heartbeat version 1) 
				and hbv2 (Heartbeat version 2).
	-n hostnames            the nodenames of the primary node and its fail-
				overs
                        	Multiple nodenames are separated by colon (:)
                        	delimeter. The first one is the nodename of the 
				primary node, the others are failover nodenames.
	-v			verbose mode
	-d target device        the target device name and mount point
                        	The device name and mount point are separated by
				colon (:) delimeter. 

EOF
	exit 1
}

# Get the library of functions
. /usr/lib/lustre/lc_common

#****************************** Global variables ******************************#
# Heartbeat tools
HB_TOOLS_PATH=${HB_TOOLS_PATH:-"/usr/lib64/heartbeat"}	# Heartbeat tools path
CIB_GEN_SCRIPT=${HB_TOOLS_PATH}/haresources2cib.py
CL_STATUS=${CL_STATUS:-"/usr/bin/cl_status"}

# Service directories and names
HARES_DIR=${HARES_DIR:-"${HA_DIR}/resource.d"}		# Heartbeat resources
LUSTRE_SRV=${LUSTRE_SRV:-"Filesystem"}	# Service script provided by Heartbeat

TMP_DIR=${HB_TMP_DIR}			# Temporary directory
HACF_TEMP=${TMP_DIR}/ha.cf.temp
AUTHKEYS_TEMP=${TMP_DIR}/authkeys${FILE_SUFFIX}

declare -a NODE_NAMES			# Node names in the failover group

# Lustre target device names, service names and mount points
declare -a TARGET_DEVNAMES TARGET_SRVNAMES TARGET_MNTPNTS
declare -i TARGET_NUM=0			# Number of targets


# Get and check the positional parameters
VERBOSE_OUTPUT=false
while getopts "r:n:vd:" OPTION; do
	case $OPTION in
	r) 
		HBVER_OPT=$OPTARG
		if [ "${HBVER_OPT}" != "${HBVER_HBV1}" ] \
		&& [ "${HBVER_OPT}" != "${HBVER_HBV2}" ]; then
			error_output "Invalid Heartbeat software" \
				  "version - ${HBVER_OPT}!"
			usage
		fi
		;;
        n)
		HOSTNAME_OPT=$OPTARG 
		PRIM_NODENAME=`echo ${HOSTNAME_OPT} | awk -F":" '{print $1}'`
		if [ -z "${PRIM_NODENAME}" ]; then
			error_output "Missing primary nodename!"
			usage
		fi
		HOSTNAME_NUM=`echo ${HOSTNAME_OPT} | awk -F":" '{print NF}'`
		if [ ${HOSTNAME_NUM} -lt 2 ]; then
			error_output "Missing failover nodenames!"
			usage
		fi
		if [ "${HBVER_OPT}" = "${HBVER_HBV1}" -a ${HOSTNAME_NUM} -gt 2 ]
		then
			error_output "Heartbeat version 1 can" \
				  "only support 2 nodes!"
			usage
		fi
		;;
	v) 
		VERBOSE_OUTPUT=true
		;;
        d)
		DEVICE_OPT=$OPTARG 
		TARGET_DEVNAMES[TARGET_NUM]=`echo ${DEVICE_OPT}|awk -F: '{print $1}'`
		TARGET_MNTPNTS[TARGET_NUM]=`echo ${DEVICE_OPT}|awk -F: '{print $2}'`
		if [ -z "${TARGET_DEVNAMES[TARGET_NUM]}" ]; then
			error_output "Missing target device name!"
			usage
		fi
		if [ -z "${TARGET_MNTPNTS[TARGET_NUM]}" ]; then
			error_output "Missing mount point for target"\
				  "${TARGET_DEVNAMES[TARGET_NUM]}!"
			usage
		fi
		TARGET_NUM=$(( TARGET_NUM + 1 ))
		;;
        ?) 
		usage 
	esac
done

# Check the required parameters
if [ -z "${HBVER_OPT}" ]; then
	error_output "Missing -r option!"
	usage
fi

if [ -z "${HOSTNAME_OPT}" ]; then
	error_output "Missing -n option!"
	usage
fi

if [ -z "${DEVICE_OPT}" ]; then
	error_output "Missing -d option!"
	usage
fi

# get_nodenames
#
# Get all the node names in this failover group
get_nodenames() {
	declare -i idx
	local nodename_str nodename

	nodename_str=`echo ${HOSTNAME_OPT}|awk '{split($HOSTNAME_OPT, a, ":")}\
		      END {for (i in a) print a[i]}'`
	idx=0
	for nodename in ${nodename_str}
        do
		NODE_NAMES[idx]=${nodename}
		idx=$idx+1
        done

	return 0
}

# check_remote_file host_name file
#
# Run remote command to check whether @file exists in @host_name
check_remote_file() {
	local host_name=$1
	local file_name=$2

	if [ -z "${host_name}" ]; then
		error_output "check_remote_file():"\
			 "Missing hostname!"
		return 1
	fi

	if [ -z "${file_name}" ]; then
		error_output "check_remote_file():"\
			 "Missing file name!"
		return 1
	fi

	# Execute remote command to check the file 
	${REMOTE} ${host_name} "[ -e ${file_name} ]"
	if [ $? -ne 0 ]; then
		error_output "check_remote_file():"\
		"${file_name} does not exist in host ${host_name}!"
		return 1
	fi

	return 0
}

# hb_running host_name
# 
# Run remote command to check whether heartbeat service is running in @host_name
hb_running() {
	local host_name=$1
	local ret_str

	ret_str=`${REMOTE} ${host_name} "${CL_STATUS} hbstatus" 2>&1`
	if [ $? -ne 0 ]; then
		if [ "${ret_str}" = "${ret_str#*stop*}" ]; then
			error_output "hb_running():"\
			"remote command to ${host_name} error: ${ret_str}!"
			return 2
		else
			return 1
		fi
	fi

	return 0
}

# stop_heartbeat host_name
#
# Run remote command to stop heartbeat service running in @host_name
stop_heartbeat() {
	local host_name=$1
	local ret_str

	ret_str=$(${REMOTE} ${host_name} "PATH=\$PATH:/sbin:/usr/sbin
service heartbeat stop < /dev/null" 2>&1)
	if [ $? -ne 0 ]; then
		error_output "stop_heartbeat():"\
		"remote command to ${host_name} error: ${ret_str}!"
		return 1
	fi

	echo "`basename $0`: Heartbeat service is stopped on node ${host_name}."
	return 0
}

# check_heartbeat
#
# Run remote command to check each node's heartbeat service
check_heartbeat() {
	declare -i idx
	local OK

	for ((idx = 0; idx < ${#NODE_NAMES[@]}; idx++)); do
		# Check Heartbeat configuration directory
		if ! check_remote_file ${NODE_NAMES[idx]} ${HA_DIR}; then
			error_output "check_heartbeat():"\
			"Is Heartbeat package installed?"
			return 1
		fi

		if [ "${HBVER_OPT}" = "${HBVER_HBV1}" ]; then
			# Check mon configuration directory
			if ! check_remote_file ${NODE_NAMES[idx]} ${MON_DIR}; then
				error_output "check_heartbeat():"\
				"Is mon package installed?"
				return 1
			fi
		fi

		if [ "${HBVER_OPT}" = "${HBVER_HBV2}" ]; then
			# Check crm directory
			if ! check_remote_file ${NODE_NAMES[idx]} ${CIB_DIR}; then
				error_output "check_heartbeat():"\
				"Is Heartbeat v2 package installed?"
				return 1
			fi
		fi
		
		# Check heartbeat service status
		hb_running ${NODE_NAMES[idx]}
		rc=$?
		if [ "$rc" -eq "2" ]; then
			return 1
		elif [ "$rc" -eq "1" ]; then
			verbose_output "Heartbeat service is stopped on"\
			"node ${NODE_NAMES[idx]}."
		elif [ "$rc" -eq "0" ]; then
			OK=
			echo -n "`basename $0`: Heartbeat service is running on"\
			"${NODE_NAMES[idx]}, go ahead to stop the service and"\
			"generate new configurations? [y/n]:"
			read OK
			if [ "${OK}" = "n" ]; then
				echo "`basename $0`: New Heartbeat configurations"\
				"are not generated."
				return 2
			fi

			# Stop heartbeat service	
			stop_heartbeat ${NODE_NAMES[idx]}
		fi
	done

	return 0
}

# get_srvname hostname target_devname
#
# Get the lustre target server name from the node @hostname
get_srvname() {
	local host_name=$1
	local target_devname=$2
	local target_srvname=
	local ret_str

	# Execute remote command to get the target server name
	ret_str=$(${REMOTE} ${host_name} "PATH=\$PATH:/sbin:/usr/sbin
${TUNEFS} --print --verbose ${target_devname} | grep Target:" 2>&1)
	if [ $? -ne 0 ]; then
		echo "`basename $0`: get_srvname() error:" \
		     "from host ${host_name} - ${ret_str}"
		return 1
	fi

	if [ "${ret_str}" != "${ret_str#*Target: }" ]; then
		ret_str=${ret_str#*Target: }
		target_srvname=`echo ${ret_str} | awk '{print $1}'`
	fi
	
	if [ -z "${target_srvname}" ]; then
		echo "`basename $0`: get_srvname() error: Cannot get the"\
		     "server name of target ${target_devname} in ${host_name}!"
		return 1
	fi

	echo ${target_srvname}
	return 0
} 

# get_srvnames
#
# Get server names of all the Lustre targets in this failover group
get_srvnames() {
	declare -i i

	# Initialize the TARGET_SRVNAMES array
	unset TARGET_SRVNAMES

	# Get Lustre target service names
	for ((i = 0; i < ${#TARGET_DEVNAMES[@]}; i++)); do
		TARGET_SRVNAMES[i]=$(get_srvname ${PRIM_NODENAME} \
				     ${TARGET_DEVNAMES[i]})
		if [ $? -ne 0 ]; then
			error_output "${TARGET_SRVNAMES[i]}"
			return 1
		fi
	done

	return 0
}

# create_template
#
# Create the templates for ha.cf and authkeys files
create_template() {
	/bin/mkdir -p ${TMP_DIR}

	# Create the template for ha.cf
	if [ "${HBVER_OPT}" = "${HBVER_HBV1}" ]; then
		cat >${HACF_TEMP} <<EOF
debugfile /var/log/ha-debug
logfile /var/log/ha-log
logfacility     local0
keepalive 2
deadtime 30
initdead 120

auto_failback off

EOF
	elif [ "${HBVER_OPT}" = "${HBVER_HBV2}" ]; then
		cat >${HACF_TEMP} <<EOF
use_logd        yes
keepalive 1
deadtime 10
initdead 60

crm yes

EOF
	fi

	# Create the template for authkeys
	if [ ! -s ${AUTHKEYS_TEMP} ]; then
		cat >${AUTHKEYS_TEMP} <<EOF
auth 1
1 sha1 HelloLustre!
EOF
	fi

	return 0
}

# create_hacf
#
# Create the ha.cf file and scp it to each node's /etc/ha.d/
create_hacf() {
	HACF_PRIMNODE=${TMP_DIR}$"/ha.cf."${PRIM_NODENAME}
	HACF_LUSTRE=${TMP_DIR}$"/ha.cf"${FILE_SUFFIX}

	declare -i idx

	if [ -e ${HACF_PRIMNODE} ]; then
		# The ha.cf file for the primary node has already existed.
		verbose_output "${HACF_PRIMNODE} already exists."
		return 0
	fi

	/bin/cp -f ${HACF_TEMP} ${HACF_LUSTRE}

        for ((idx = 0; idx < ${#NODE_NAMES[@]}; idx++)); do
		echo "node    ${NODE_NAMES[idx]}" >> ${HACF_LUSTRE}
        done

	# scp ha.cf file to all the nodes
	for ((idx = 0; idx < ${#NODE_NAMES[@]}; idx++)); do
		touch ${TMP_DIR}$"/ha.cf."${NODE_NAMES[idx]}
		scp ${HACF_LUSTRE} ${NODE_NAMES[idx]}:${HA_DIR}/
		if [ $? -ne 0 ]; then
			error_output "Failed to scp ha.cf file"\
				 "to node ${NODE_NAMES[idx]}!"
			return 1
		fi
	done

	return 0
}

# create_haresources
#
# Create the haresources file and scp it to the each node's /etc/ha.d/
create_haresources() {
	HARES_PRIMNODE=${TMP_DIR}$"/haresources."${PRIM_NODENAME}
	HARES_LUSTRE=${TMP_DIR}$"/haresources"${FILE_SUFFIX}
	declare -i idx
	local res_line

	if [ -s ${HARES_PRIMNODE} ]; then
		# The haresources file for the primary node has already existed
		if [ -n "`/bin/grep ${TARGET_DEVNAMES[0]} ${HARES_PRIMNODE}`" ]; then
			verbose_output "${HARES_PRIMNODE} already exists."
			return 0
		fi
	fi
		
	# Add the resource group line into the haresources file
	res_line=${PRIM_NODENAME}
	for ((idx = 0; idx < ${#TARGET_DEVNAMES[@]}; idx++)); do
		res_line=${res_line}" "${LUSTRE_SRV}::${TARGET_DEVNAMES[idx]}::${TARGET_MNTPNTS[idx]}::${FS_TYPE}
			
		if [ "${HBVER_OPT}" = "${HBVER_HBV1}" ]; then
			res_line=${res_line}" "${TARGET_SRVNAMES[idx]}"-mon"
		fi
	done
	echo "${res_line}" >> ${HARES_LUSTRE}

	# Generate the cib.xml file
	if [ "${HBVER_OPT}" = "${HBVER_HBV2}" ]; then
		# Add group haclient and user hacluster
		[ -z "`grep haclient /etc/group`" ] && groupadd haclient
		[ -z "`grep hacluster /etc/passwd`" ] && useradd -g haclient hacluster

		CIB_LUSTRE=${TMP_DIR}$"/cib.xml"${FILE_SUFFIX}
		python ${CIB_GEN_SCRIPT} --stdout \
		${HARES_LUSTRE} > ${CIB_LUSTRE}
		if [ $? -ne 0 ]; then
			error_output "Failed to generate cib.xml file"\
				 "for node ${PRIM_NODENAME}!"
			return 1
		fi
	fi

	# scp the haresources file or cib.xml file
	for ((idx = 0; idx < ${#NODE_NAMES[@]}; idx++)); do
		/bin/cp -f ${HARES_LUSTRE} ${TMP_DIR}$"/haresources."${NODE_NAMES[idx]}
		scp ${HARES_LUSTRE} ${NODE_NAMES[idx]}:${HA_DIR}/
		if [ $? -ne 0 ]; then
			error_output "Failed to scp haresources file"\
				 "to node ${NODE_NAMES[idx]}!"
			return 1
		fi

		if [ "${HBVER_OPT}" = "${HBVER_HBV2}" ]; then
			scp ${CIB_LUSTRE} ${NODE_NAMES[idx]}:${CIB_DIR}/
			if [ $? -ne 0 ]; then
				error_output "Failed to scp cib.xml"\
				 	 "file to node ${NODE_NAMES[idx]}!"
				return 1
			fi
		fi
	done

	return 0
}

# create_authkeys
#
# Create the authkeys file and scp it to the each node's /etc/ha.d/
create_authkeys() {
	AUTHKEYS_PRIMNODE=${TMP_DIR}$"/authkeys."${PRIM_NODENAME}
	declare -i idx

	if [ -e ${AUTHKEYS_PRIMNODE} ]; then
		verbose_output "${AUTHKEYS_PRIMNODE} already exists."
		return 0
	fi

	# scp the authkeys file to all the nodes
	chmod 600 ${AUTHKEYS_TEMP}
	for ((idx = 0; idx < ${#NODE_NAMES[@]}; idx++)); do
		touch ${TMP_DIR}$"/authkeys."${NODE_NAMES[idx]}
		scp -p ${AUTHKEYS_TEMP} ${NODE_NAMES[idx]}:${HA_DIR}/
		if [ $? -ne 0 ]; then
			error_output "Failed to scp authkeys file"\
				 "to node ${NODE_NAMES[idx]}!"
			return 1
		fi
	done

	return 0
}

# create_moncf
#
# Create the mon.cf file and scp it to the each node's /etc/mon/
create_moncf() {
	MONCF_PRIMNODE=${TMP_DIR}$"/mon.cf."${PRIM_NODENAME}
	MONCF_LUSTRE=${TMP_DIR}$"/mon.cf"${FILE_SUFFIX}
	local srv_name params=
	declare -i idx
	declare -a OLD_TARGET_SRVNAMES		# targets in other nodes 
						# in this failover group
	# Initialize the OLD_TARGET_SRVNAMES array
	unset OLD_TARGET_SRVNAMES

	if [ -s ${MONCF_PRIMNODE} ]; then
		if [ -n "`/bin/grep ${TARGET_SRVNAMES[0]} ${MONCF_PRIMNODE}`" ]
		then
			verbose_output "${MONCF_PRIMNODE} already exists."
			return 0
		else
			# Get the Lustre target service names 
			# from the previous mon.cf file
			idx=0
			for srv_name in `grep hostgroup ${MONCF_PRIMNODE}\
					|awk '$2 ~ /-mon/ {print $2}'|xargs`
			do
				OLD_TARGET_SRVNAMES[idx]=`echo ${srv_name}\
							  |sed 's/-mon//g'`
				idx=$(( idx + 1 ))
			done
		fi
	fi

	# Construct the parameters to mon.cf generation script
	for ((idx = 0; idx < ${#NODE_NAMES[@]}; idx++)); do
		params=${params}" -n "${NODE_NAMES[idx]}
	done

	for ((idx = 0; idx < ${#OLD_TARGET_SRVNAMES[@]}; idx++)); do
		params=${params}" -o "${OLD_TARGET_SRVNAMES[idx]}
	done

	for ((idx = 0; idx < ${#TARGET_SRVNAMES[@]}; idx++)); do
		params=${params}" -o "${TARGET_SRVNAMES[idx]}
	done

	${SCRIPT_GEN_MONCF} ${params}
	if [ $? -ne 0 ]; then
		error_output "Failed to generate mon.cf file"\
			 "by using ${SCRIPT_GEN_MONCF}!"
		return 1
	fi

	/bin/mv *-mon.cfg ${MONCF_LUSTRE}

	# scp the mon.cf file to all the nodes
	for ((idx = 0; idx < ${#NODE_NAMES[@]}; idx++)); do
		/bin/cp -f ${MONCF_LUSTRE} ${TMP_DIR}$"/mon.cf."${NODE_NAMES[idx]}

		scp ${MONCF_LUSTRE} ${NODE_NAMES[idx]}:${MON_DIR}/
		if [ $? -ne 0 ]; then
			error_output "Failed to scp mon.cf file"\
				 "to node ${NODE_NAMES[idx]}!"
			return 1
		fi
	done

	return 0
}

# generate_config
#
# Generate the configuration files for Heartbeat and scp them to all the nodes
generate_config() {
	if [ "${HBVER_OPT}" = "${HBVER_HBV1}" ]; then
		# Get server names of Lustre targets
		if ! get_srvnames; then
			return 1
		fi
	fi
	
	if ! create_template; then
		return 1
	fi

	verbose_output "Creating and remote copying ha.cf${FILE_SUFFIX} file to"\
		       "${PRIM_NODENAME} failover group hosts..." 
	if ! create_hacf; then
		return 1
	fi
	verbose_output "OK"

	verbose_output "Creating and remote copying haresources${FILE_SUFFIX} file"\
		       "to ${PRIM_NODENAME} failover group hosts..."
	if ! create_haresources; then
		return 1
	fi
	verbose_output "OK"

	verbose_output "Creating and remote copying authkeys${FILE_SUFFIX} file to" \
		       "${PRIM_NODENAME} failover group hosts..."
	if ! create_authkeys; then
		return 1
	fi
	verbose_output "OK"

	if [ "${HBVER_OPT}" = "${HBVER_HBV1}" ]; then
		verbose_output "Creating and remote copying mon.cf${FILE_SUFFIX} file to" \
				"${PRIM_NODENAME} failover group hosts..."
		if ! create_moncf; then
			return 1
		fi
		verbose_output "OK"
	fi

	return 0
}

# Main flow
# Get all the node names
if ! get_nodenames; then
	exit 1
fi

# Check heartbeat services
verbose_output "Checking heartbeat service in the ${PRIM_NODENAME}"\
	       "failover group hosts..."
check_heartbeat
rc=$?
if [ "$rc" -eq "2" ]; then
	verbose_output "OK"
	exit 0
elif [ "$rc" -eq "1" ]; then
	exit 1
fi
verbose_output "OK"

# Generate configuration files
if ! generate_config; then
	exit 1
fi

exit 0
