#!/bin/bash

# vim:expandtab:shiftwidth=4:softtabstop=4:tabstop=4:

#
# lc_net - script for Lustre cluster network verification
#
###############################################################################

# Usage
usage() {
	cat >&2 <<EOF

Usage:	`basename $0` [options] <csv file>

	Options:
	-a		select all the nodes from the csv file to operate on
	-w hostname,hostname,...
			select the specified list of nodes (separated by commas)
	-x hostname,hostname,...
			exclude the specified list of nodes (separated by commas)
	-v		verbose mode
	csv file	a spreadsheet that contains configuration parameters
			(separated by commas) for each target in a Lustre cl-
			uster, the first field of each line is the host name
			of the cluster node

EOF
	exit 1
}

# Get the library of functions
. /usr/lib/lustre/lc_common

VERBOSE_OUTPUT=false
# Get and check the positional parameters
while getopts "aw:x:v" OPTION; do
	case $OPTION in
	a)
		[ -z "${SPECIFIED_NODELIST}" ] && [ -z "${EXCLUDED_NODELIST}" ]\
		&& USE_ALLNODES=true
		;;
	w)
		USE_ALLNODES=false
		SPECIFIED_NODELIST=$OPTARG
		;;
	x)
		USE_ALLNODES=false
		EXCLUDED_NODELIST=$OPTARG
		;;
	v)
		VERBOSE_OUTPUT=true
		;;
        ?)
		usage
	esac
done

# Toss out the parameters we've already processed
shift  `expr $OPTIND - 1`

# Here we expect the csv file
if [ $# -eq 0 ]; then
	error_output "Missing csv file!"
	usage
fi

# Global variables
CSV_FILE=$1
declare -a HOST_NAMES
declare -a HOST_IPADDRS

# Get the hosts to be operated on
get_hostnames() {
	local NODES_TO_USE

	# Initialize the HOST_NAMES array
	unset HOST_NAMES

	# Get the list of nodes to be operated on
	NODES_TO_USE=$(get_nodelist)
	[ ${PIPESTATUS[0]} -ne 0 ] && error_output "${NODES_TO_USE}" && return 1

	# Check the node list
	if [ -z "${NODES_TO_USE}" ]; then
		echo "`basename $0`: There are no hosts to be operated on."\
		"Check the node selection options (-a, -w or -x)."
		return 1
	fi

	# Load the hostnames in the nodelist into the array
	HOST_NAMES=( $(echo ${NODES_TO_USE//,/ }) )

	return 0
}

# ping_host host_name
# Check whether host $host_name is reachable.
# If it is, then return the IP address of this host.
ping_host() {
	local host_name=$1
	local ip_addr=
	local ret_str

	if [ -z "${host_name}" ]; then
		echo "`basename $0`: ping_host() error: Missing hostname!"
		return 1
	fi

	# Run ping command
	ret_str=$(ping -c1 ${host_name} 2>&1)
	if [ ${PIPESTATUS[0]} -ne 0 ]; then
		if [ -n "${ret_str}" ]; then
			echo "`basename $0`: ping_host() error: ${ret_str}!"
		else
			echo "`basename $0`: ping_host() error:"\
			"Host ${host_name} does not respond to ping!"
		fi
		return 1
	fi

	# Get the IP address
	ip_addr=`echo "${ret_str}" | head -1 | awk '{print $3}' | \
		sed -e 's/^(//' -e 's/)$//'`

	echo "${ip_addr}"
	return 0
}

# local_check index
# Check the network connectivity between local host and ${HOST_NAMES[index]}.
local_check() {
	declare -i i=$1

	# Check whether ${HOST_NAMES[i]} is reachable
	# and get the IP address of this host from ping
	HOST_IPADDRS[i]=$(ping_host ${HOST_NAMES[i]})
	if [ ${PIPESTATUS[0]} -ne 0 ]; then
		error_output "${HOST_IPADDRS[i]}"
		return 1
	fi

	return 0
}

# remote_check index
# Check whether ${HOST_NAMES[index]} can resolve its own name and whether
# this host agrees with the local host about what its name is resolved to.
remote_check() {
	declare -i i=$1
	local cmd ret_str
	local ip_addr=		# the IP address got from remote ping

	# Execute remote command to check whether ${HOST_NAMES[i]}
	# can resolve its own name
	cmd="ping -c1 ${HOST_NAMES[i]} 2>&1"
	ret_str=$(${REMOTE} ${HOST_NAMES[i]} "${cmd}" 2>&1)
	if [ ${PIPESTATUS[0]} -ne 0 -a -n "${ret_str}" ]; then
		error_output "remote_check():"\
		"remote to ${HOST_NAMES[i]} error: ${ret_str}!"
		return 1
	fi

	if [ -z "${ret_str}" ]; then
		error_output "remote_check():"\
		"No results from ${HOST_NAMES[i]}! Check the network"\
		"connectivity between local host and ${HOST_NAMES[i]}!"
		return 1
	fi

	# Get the IP address of ${HOST_NAMES[i]} from its own ping
	ip_pattern="[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}"
	ip_addr=`echo "${ret_str}" | egrep -m 1 -o "${ip_pattern}"`

	# Compare IP addresses
	# Check whether ${HOST_NAMES[i]} agrees with the local host
	# about what its name is resolved to.
	if [ "${ip_addr}" != "${HOST_IPADDRS[i]}" ]; then
		error_output "remote_check():"\
		"Local host resolves ${HOST_NAMES[i]} to IP address"\
		"\"${HOST_IPADDRS[i]}\", while its own resolution is"\
		"\"${ip_addr}\". They are not the same!"
		return 1
	fi

	return 0
}

# network_verify
# Verify name resolution and network connectivity of the Lustre cluster
network_verify() {
	declare -i i

	# Initialize the HOST_IPADDRS array
	unset HOST_IPADDRS

	# Get all the host names to be operated on
	! get_hostnames && return 1

	# Check the network connectivity between local host
	# and other cluster nodes
	for ((i = 0; i < ${#HOST_NAMES[@]}; i++)); do
		[ "${HOST_NAMES[i]}" = "`hostname`" ] && continue

		verbose_output "Verifying network connectivity between"\
			       "\"`hostname`\" and \"${HOST_NAMES[i]}\"..."
		! local_check $i && return 1
		! remote_check $i && return 1
		verbose_output "OK"
	done

	return 0
}

# Main flow
if ! check_file ${CSV_FILE}; then
	exit 1
fi

# Cluster network verification
if ! network_verify; then
	exit 1
fi

exit 0
