#!/bin/bash
#
# vim:expandtab:shiftwidth=4:softtabstop=4:tabstop=4:
#
# lc_md - configure Linux MD devices from a csv file
#
################################################################################

# Usage
usage() {
    cat >&2 <<EOF

Usage:  `basename $0` [options] <csv file>

    This script is used to configure Linux MD devices in a Lustre cluster
    from a csv file.

    Options:
    -a          select all the nodes from the csv file to operate on
    -w hostname,hostname,...
                select the specified list of nodes (separated by commas)
    -x hostname,hostname,...
                exclude the specified list of nodes (separated by commas)
    -h          help and examples
    -v          verbose mode
    csv file    a spreadsheet that contains configuration parameters
                (separated by commas) for each Linux MD device to be
                configured in a Lustre cluster

EOF
    exit 1
}

# Samples 
sample() {
    cat <<EOF

This script is used to configure Linux MD devices in a Lustre cluster
from a csv file.

Each line marked with "MD" in the csv file represents one MD device.
The format is:
hostname,MD,md name,operation mode,options,raid level,component devices

hostname            hostname of the node in the cluster
MD                  marker of MD device line
md name             MD device name, e.g. /dev/md0
operation mode      create or remove, default is create
options             a "catchall" for other mdadm options, e.g. "-c 128"
raid level          raid level: 0,1,4,5,6,10,linear and multipath
component devices   block devices to be combined into the MD device
                    Multiple devices are separated by space or by using
                    shell expansions, e.g. "/dev/sd{a,b,c}"

Items left blank will be set to defaults.

Example:
-------------------------------------------------------
# MD devices on mgsnode
mgsnode,MD,/dev/md0,,-q -c 32,1,/dev/sda1 /dev/sdb1
mgsnode,MD,/dev/md1,,-q -c 32,1,/dev/sdc1 /dev/sdd1
mgsnode,MD,/dev/md2,,-q -c 32,0,/dev/md0 /dev/md1

# MD device on ostnode
ostnode,MD,/dev/md0,,-q -c 128,5,"/dev/sd{a,b,c,d,e}"
-------------------------------------------------------

EOF
    exit 0
}

# Get the library of functions
. /usr/lib/lustre/lc_common

#***************************** Global variables *****************************#
# All the MD device items in the csv file
declare -a HOST_NAME MD_NAME OP_MODE OP_OPTS RAID_LEVEL MD_DEVS

# Variables related to background executions
declare -a REMOTE_CMD
declare -a REMOTE_PID
declare -i pid_num=0


VERBOSE_OUTPUT=false
# Get and check the positional parameters
while getopts "aw:x:hv" OPTION; do
    case $OPTION in
    a)
        [ -z "${SPECIFIED_NODELIST}" ] && [ -z "${EXCLUDED_NODELIST}" ] \
        && USE_ALLNODES=true
        ;;
    w)
        USE_ALLNODES=false
        SPECIFIED_NODELIST=$OPTARG
        ;;
    x)
        USE_ALLNODES=false
        EXCLUDED_NODELIST=$OPTARG
        ;;
    h)
        sample
        ;;
    v)
        VERBOSE_OUTPUT=true
        ;;
    ?)
        usage 
    esac
done

# Toss out the parameters we've already processed
shift  `expr $OPTIND - 1`

# Here we expect the csv file
if [ $# -eq 0 ]; then
    error_output "Missing csv file!"
    usage
fi

CSV_FILE=$1

# check_md_item index
#
# Check the items required for managing MD device ${MD_NAME[index]}
check_md_item() {
    # Check argument
    if [ $# -eq 0 ]; then
        error_output "check_md_item():"\
                 "Missing argument!"
        return 1
    fi

    declare -i i=$1

    # Check hostname
    if [ -z "${HOST_NAME[i]}" ]; then
        error_output "check_md_item():"\
                 "hostname item has null value!"
        return 1
    fi

    # Check items required by create mode
    if [ -z "${OP_MODE[i]}" -o "${OP_MODE[i]}" = "create" ]; then
        # Check MD device name 
        if [ -z "${MD_NAME[i]}" ]; then
            error_output "check_md_item():"\
            "md name item has null value!"
            return 1
        fi

        if [ -z "${RAID_LEVEL[i]}" ]; then
            error_output "check_md_item():"\
            "raid level item of MD device ${MD_NAME[i]} has null value!"
            return 1
        fi

        if [ -z "${MD_DEVS[i]}" ]; then
            error_output "check_md_item():"\
            "component devices item of ${MD_NAME[i]} has null value!"
            return 1
        fi
    fi

    return 0
}

# get_md_items csv_file
#
# Get all the MD device items in the $csv_file and do some checks.
get_md_items() {
    # Check argument
    if [ $# -eq 0 ]; then
        error_output "get_md_items(): Missing csv file!"
        return 1
    fi

    local CSV_FILE=$1
    local LINE
    local hostname
    declare -i line_num=0
    declare -i idx=0

    while read -r LINE; do
        let "line_num += 1"

        # Skip the comment line
        [ -z "`echo \"${LINE}\" | egrep -v \"([[:space:]]|^)#\"`" ] && continue

        # Skip the non-MD line
        [ "$(echo ${LINE} | cut -d, -f 2)" != "${MD_MARKER}" ] && continue

        # Skip the host which is not specified in the host list
        if ! ${USE_ALLNODES}; then
            hostname=$(echo ${LINE} | cut -d, -f 1)
            ! host_in_hostlist ${hostname} ${NODES_TO_USE} && continue
        fi

        # Parse the config line into CONFIG_ITEM
        if ! parse_line "$LINE"; then
            return 1    
        fi

        HOST_NAME[idx]=${CONFIG_ITEM[0]}
        MD_NAME[idx]=${CONFIG_ITEM[2]}
        OP_MODE[idx]=${CONFIG_ITEM[3]}
        OP_OPTS[idx]=${CONFIG_ITEM[4]}
        RAID_LEVEL[idx]=${CONFIG_ITEM[5]}
        MD_DEVS[idx]=${CONFIG_ITEM[6]}

        # Check some required items
        if ! check_md_item $idx; then
            error_output "check_md_item():"\
                     "Occurred on line ${line_num} in ${CSV_FILE}."
            return 1    
        fi

        let "idx += 1"
    done < ${CSV_FILE}

    return 0
}

# md_is_active host_name md_name
#
# Run remote command to check whether $md_name is active in @host_name
md_is_active() {
    local host_name=$1
    local md_name=$2
    local cmd ret_str

    cmd="grep -q ${md_name##*/} /proc/mdstat 2>&1"
    ret_str=$(${REMOTE} ${host_name} "${cmd}" 2>&1)
    if [ ${PIPESTATUS[0]} -ne 0 ]; then
        if [ -n "${ret_str}" ]; then
            error_output "md_is_active():"\
            "remote command to ${host_name} error: ${ret_str}!"
            return 2    # Error occurred
        else
            return 1    # inactive
        fi
    fi

    return 0            # active
}

# construct_mdadm_create_cmdline index
#
# Construct the create operation command line of mdadm for ${MD_NAME[index]}
construct_mdadm_create_cmdline() {
    declare -i i=$1
    local cmd_line
    local echo_disk disk line
    declare -i alldisks=0 
    declare -i raiddisks=0 
    declare -i sparedisks=0

    cmd_line="${MDADM} -C -R ${MD_NAME[i]} ${OP_OPTS[i]} -l ${RAID_LEVEL[i]}"

    if [ "${OP_OPTS[i]}" != "${OP_OPTS[i]#* -n*}" ]\
    || [ "${OP_OPTS[i]}" != "${OP_OPTS[i]#*--raid-devices*}" ]; then
        cmd_line=${cmd_line}" ${MD_DEVS[i]}"
        echo ${cmd_line}
        return 0
    fi

    # FIXME: Get the number of component devices in the array
    echo_disk="for disk in ${MD_DEVS[i]}; do echo $disk; done"
    while read line; do
        let "alldisks += 1"
    done < <(${REMOTE} ${HOST_NAME[i]} "${echo_disk}")

    if [ ${alldisks} -eq 0 ]; then
        echo "`basename $0`: construct_mdadm_create_cmdline() error:"\
        "Failed to execute remote command to get the number of"\
        "component devices of array ${MD_NAME[i]} from host ${HOST_NAME[i]}!"
        return 1
    fi

    # Get the specified number of spare (eXtra) devices
    if [ "${OP_OPTS[i]}" != "${OP_OPTS[i]#* -x*}" ]; then
        sparedisks=`echo ${OP_OPTS[i]##* -x}|awk -F" " '{print $1}'`
    elif [ "${OP_OPTS[i]}" != "${OP_OPTS[i]#*--spare-devices*}" ]; then
        sparedisks=`echo ${OP_OPTS[i]##*--spare-devices=}|awk -F" " '{print $1}'`
    fi

    # Get the number of raid devices in the array
    # The number of raid devices in the array plus the number of spare devices
    # listed on the command line must equal the number of component devices 
    # (including "missing" devices). 
    let "raiddisks = alldisks - sparedisks"

    if [ ${raiddisks} -lt 1 ]; then
        echo "`basename $0`: construct_mdadm_create_cmdline() error:"\
        "Invalid number of raid devices in array ${MD_NAME[i]}: ${raiddisks}!"\
        "Check the number of spare devices and whether all the component devices"\
        "\"${MD_DEVS[i]}\" (except \"missing\" devices) exist in host ${HOST_NAME[i]}!"
        return 1
    fi

    cmd_line=${cmd_line}" -n ${raiddisks} ${MD_DEVS[i]}"

    echo ${cmd_line}
    return 0
}

# construct_mdadm_rm_cmdline index
#
# Construct the remove operation command line of mdadm for ${MD_NAME[index]}
construct_mdadm_rm_cmdline() {
    declare -i i=$1
    local mdadm_cmd
    local real_devs

    # Deactivate the MD array, releasing all resources
    mdadm_cmd="${MDADM} -S ${MD_NAME[i]}"

    if [ -n "${MD_DEVS[i]}" ]; then
        # Remove the "missing" devices from the component devices
        real_devs=`echo ${MD_DEVS[i]} | sed 's/missing//g'`
        # Over-written the superblock with zeros
        mdadm_cmd=${mdadm_cmd}" && ${MDADM} --zero-superblock ${real_devs} || true"
    fi

    echo ${mdadm_cmd}
    return 0
}

# construct_mdadm_cmdline host_name
#
# Construct the command line of mdadm to be run in $host_name
construct_mdadm_cmdline() {
    MDADM_CMDLINE=
    local host_name=$1
    local mdadm_stop_cmd mdadm_cmd
    local rc OK
    declare -i i

    # Construct command line
    for ((i = 0; i < ${#HOST_NAME[@]}; i++)); do
        mdadm_stop_cmd=
        mdadm_cmd=
        if [ "${host_name}" = "${HOST_NAME[i]}" ]; then
            case "${OP_MODE[i]}" in
            "" | create)
                    # Check the status of the MD array
                    md_is_active ${host_name} ${MD_NAME[i]}
                    rc=${PIPESTATUS[0]}
                    if [ "$rc" -eq "2" ]; then
                        return 1
                    elif [ "$rc" -eq "0" ]; then
                        OK=
                        echo -n "`basename $0`: ${MD_NAME[i]} is active on"\
                        "${host_name}, go ahead to deactivate it and create"\
                        "the new array? [y/n]:"
                        read OK
                        if [ "${OK}" = "n" ]; then
                                echo "`basename $0`: ${MD_NAME[i]} on host"\
                                "${host_name} remains as it is."
                                continue
                        fi

                        # Construct the remove command line
                        mdadm_stop_cmd=$(construct_mdadm_rm_cmdline ${i})
                    fi

                    # Construct the create command line
                    mdadm_cmd=$(construct_mdadm_create_cmdline ${i})
                    if [ ${PIPESTATUS[0]} -ne 0 ]; then
                        error_output "${mdadm_cmd}"
                        return 1
                    fi

                    [ -n "${mdadm_stop_cmd}" ] && mdadm_cmd=${mdadm_stop_cmd}" && "${mdadm_cmd}
                    ;;
            remove)
                    if [ -z "${MD_NAME[i]}" ]; then
                        OK=
                        echo -n "`basename $0`: Do you really want to remove"\
                        "all the MD devices in the host ${HOST_NAME[i]}? [y/n]:"
                        read OK
                        if [ "${OK}" = "n" ]; then
                            echo "`basename $0`: MD devices on host"\
                            "${HOST_NAME[i]} remain as they are."
                            continue
                        fi

                        # Construct the teardown command line
                        mdadm_cmd="(cat /proc/mdstat | egrep \"^md[[:digit:]]\" |"
                        mdadm_cmd=${mdadm_cmd}" while read md rest; do ${MDADM} -S /dev/\$md; done)"
                    else
                        # Construct the remove command line
                        mdadm_cmd=$(construct_mdadm_rm_cmdline ${i})
                    fi
                    ;;
            *)
                # Other operations
                mdadm_cmd="${MDADM} ${OP_MODE[i]} ${MD_NAME[i]} ${OP_OPTS[i]} ${MD_DEVS[i]}"
                ;;
            esac

            if [ -z "${MDADM_CMDLINE}" ]; then
                MDADM_CMDLINE=${mdadm_cmd}
            else
                MDADM_CMDLINE=${MDADM_CMDLINE}" && "${mdadm_cmd}
            fi
        fi
    done

    return 0
}

# config_md_devs host_name
#
# Run remote command to configure MD devices in $host_name
config_md_devs() {
    local host_name=$1

    # Construct mdadm command line
    if ! construct_mdadm_cmdline ${host_name}; then
        return 1
    fi
    
    if [ -z "${MDADM_CMDLINE}" ]; then
        verbose_output "There are no MD devices on host ${host_name}"\
        "needed to be configured."
        return 0
    fi

    # Run remote command to configure MD devices in $host_name
    verbose_output "Configuring MD devices in host ${host_name}..."
    verbose_output "Configure command line is: \"${MDADM_CMDLINE}\""
    REMOTE_CMD[pid_num]="${REMOTE} ${host_name} \"${MDADM_CMDLINE}\""
    $REMOTE $host_name "export PATH=\$PATH:/sbin:/usr/sbin; $MDADM_CMDLINE" &
    REMOTE_PID[pid_num]=$!
    let "pid_num += 1"
    sleep 1

    return 0
}

# Run remote command to configure all the MD devices specified in the csv file
config_md() {
    declare -i i=0
    declare -i idx=0        # Index of NODE_NAME array
    local host_name
    local failed_status

    # Initialize the NODE_NAME array
    unset NODE_NAME

    for ((i = 0; i < ${#HOST_NAME[@]}; i++)); do
        host_name=${HOST_NAME[i]}
        configured_host ${host_name} && continue

        NODE_NAME[idx]=${host_name}
        let "idx += 1"

        # Run remote command to configure MD devices in $host_name
        if ! config_md_devs ${host_name}; then
            return 1
        fi
    done

    if [ ${#HOST_NAME[@]} -eq 0 -o ${#REMOTE_PID[@]} -eq 0 ]; then
        verbose_output "There are no MD devices to be configured."
        return 0
    fi

    # Wait for the exit status of the background remote command
    verbose_output "Waiting for the return of the remote command..."
    failed_status=false
    for ((pid_num = 0; pid_num < ${#REMOTE_PID[@]}; pid_num++)); do
        wait ${REMOTE_PID[${pid_num}]}
        if [ ${PIPESTATUS[0]} -ne 0 ]; then
            error_output "config_md(): Failed"\
                 "to execute \"${REMOTE_CMD[${pid_num}]}\"!"
            failed_status=true
        fi
    done

    if ${failed_status}; then
        return 1
    fi

    verbose_output "All the MD devices are configured successfully!"
    return 0
}

# Main flow
# Check the csv file
check_file $CSV_FILE || exit ${PIPESTATUS[0]}

# Get the list of nodes to be operated on
NODES_TO_USE=$(get_nodelist) || error_exit ${PIPESTATUS[0]} "$NODES_TO_USE"

# Check the node list
check_nodelist ${NODES_TO_USE} || exit 1

# Get all the MD device items from the csv file 
if ! get_md_items ${CSV_FILE}; then
    exit 1
fi

# Configure the MD devices 
if ! config_md; then
    exit 1
fi

exit 0
