setterm -cursor off

stty -echoctl # hide ^C

# function called by trap
ct_on_exit() {
    setterm -cursor on
    if [ $# -gt 0 ] && [ $1 -gt 0 ]; then
      ct_notify "$(basename $0) Finish with errors"
    fi
    exit $1
}

trap 'ct_on_exit' EXIT HUP INT TERM

# Init list of OSDs Down
ceph osd tree down |grep "osd\."| awk '{print $1}'|sort -n -o /tmp/OSDs_Down

CT_SCRIPT_NAME=$0
CT_MAX_REMAPPED=64
CT_OSD_DOWN=0
CT_MAX_TEMP=70
CT_HELP_MSG="""
  -h, --help                show this help message and exit
  -M MAX_REMAPPED, --max_remapped MAX_REMAPPED            
                            Increasing PG_NUM waits until there is less 'MAX_REMAPPED' in progress to resume,
                          default: $CT_MAX_REMAPPED
  -d, --osd_down            Suspend operation when an OSD is down
  -k, --exit_on_critical    Exit script if critical event
  -w WAIT, --wait_for_nobackfill WAIT
                            Wait to start until the cluster has no more backfill
  -t MAX_TEMP, --max_temp MAX_TEMP
                            Suspend operation if serveur Temp >= MAX_TEMP
  -S SCHEDULE, --schedule SCHEDULE
                            Schedule, eg : 21-07 for 21h to 07h or 08-16 for 8h to 16h
"""


CT_HELP_EXAMPLE="-M 100 -d -k -w -t 45 -S 21-07"

CT_HELP_DESCR="[-m|--CT_MAX_REMAPPED CT_MAX_REMAPPED] [-w|--wait_for_nobackfill WAIT] [-d|osd-down]"

ct_help_min () {
    if [ $1 -lt $2 ]
    then
        help_msg
        ct_on_exit 1
    fi
}

ct_help () {
  _shift=0
  while [ $# -ge 1 ]; do
    ARGS=$#
    _key="$1"
    case $_key in
      -h|--help)
          help_msg
          ct_on_exit 0
          shift 1
          ;;
      -M|--max_remapped)
          CT_MAX_REMAPPED=$2
          ((_shift = _shift + 2))
          shift 2
          ;;
      -d|--osd_down)
          CT_OSD_DOWN=1
          ((_shift = _shift + 1))
          shift 1
          ;;
      -k|--exit_on_critical)
          CT_EXIT_ON_ERROR=1
          ((_shift = _shift + 1))
          shift 1
          ;;
      -t|--max_temp)
          CT_MAX_TEMP=$2
            ((_shift = _shift + 2))
            shift 2
            ;;
      -w|--wait_for_nobackfill)
          CT_WAIT=1
          ((_shift = _shift + 1))
          shift 1
          ;;
      -S|--schedule)
          CT_SCHEDULE=$2
            ((_shift = _shift + 2))
            shift 2
            ;;
    esac
    if [ "$#" == "$ARGS" ]; then
        return $_shift
    fi
  done
  return $_shift
}

_CT_AVG_FILE=$(mktemp -t ct_avg_file.XXXXX)

_check_variation_remapped() {
  # Calculate variation of remapped PGs for the last 60 measures (~10 minutes)
  _PAST=$(($(date "+%s") - 600))
  _cpt=0
  while read -r _TIME _REMAP
  do
    if [ "${_TIME}" -lt "${_PAST}" ]; then
      ((_cpt+=1))
    else
      break
    fi
  done < $_CT_AVG_FILE

  sed -i "1,${_cpt}d" $_CT_AVG_FILE
  if [ "$(cat $_CT_AVG_FILE | wc -l)" -lt "30" ]; then
    return 1
  fi
  _OLD_REMAP=$(cat $_CT_AVG_FILE| head 1| awk '{print $1}')
  while read -r _TIME _REMAP
  do
    if ! [ "${_REMAP}" -eq "${_OLD_REMAP}" ]; then
      return 0
    fi
  done < $_CT_AVG_FILE
  return 1
}

ct_get_current_remapped() {
  # Get Current Remapped
  _DATE=$(date "+%s")
  _REMAP=$(ceph -s -f json | jq '.["osdmap"]["osdmap"]["num_remapped_pgs"]')
  echo ${_DATE} ${REMAP} >> $_CT_AVG_FILE
  echo ${_REMAP}
}

_ct_test_osd_down() {
  # Test if OSD has gone down since script beginning
  if [ $CT_OSD_DOWN -eq 1 ]; then
    ceph osd tree down |grep "osd\."| awk '{print $1}'|sort -n -o /tmp/OSDs_Down_now
    if ! diff -q /tmp/OSDs_Down /tmp/OSDs_Down_now > /dev/null; then
      echo "OSDs down, operation paused"
      echo "For resume,"
      echo "You can reinit OSDs Down list by execute"
      echo "'ceph osd tree down |grep \"osd\\.\"| awk '{print \$1}'|sort -n -o /tmp/OSDs_Down'"
      ct_notify "Detected OSD Down"
      while ! diff -q /tmp/OSDs_Down /tmp/OSDs_Down_now > /dev/null; do
        sleep 60
        ceph osd tree down |grep "osd\."| awk '{print $1}'|sort -n -o /tmp/OSDs_Down_now
      done
      ct_logger "OSDs OK: Resume"
    fi
  fi
  return
}

_ct_test_temp () {
  # Test current temperature
  for HWMON in $(ls /sys/class/hwmon); do
    if [ -f "/sys/class/hwmon/${HWMON}/temp1_input" ]; then
      _CT_TEMP=$(($(cat "/sys/class/hwmon/${HWMON}/temp1_input") / 1000))
      if [[ $_CT_TEMP -ge $CT_MAX_TEMP ]]; then
        ct_logger "High temperature (${_CT_TEMP} > ${CT_MAX_TEMP}): Pause"
        while [[ $_CT_TEMP -ge $CT_MAX_TEMP ]]; do
          sleep 60
          _CT_TEMP=$(($(cat "/sys/class/hwmon/${HWMON}/temp1_input") / 1000))
        done
        ct_logger "Temperature OK: Resume"
        return
      fi 
    fi
  done
}

_ct_test_schedule () {
  # Test if in the good time range
  if [ ! -z "$CT_SCHEDULE" ]; then
    # define Test
    day(){ [ $_CT_F -ge $_CT_D ]; }
    sup_begin(){ [ $_CT_T -ge $_CT_D ]; }
    inf_end(){ [ $_CT_T -lt $_CT_F ]; }
    test_in_hours() { { day && { sup_begin && inf_end; }; } || { ! day && { sup_begin || inf_end; }; }; }
    _CT_T=$(date "+%H")
    _CT_D=$(echo $CT_SCHEDULE | cut -d '-' -f 1)
    _CT_F=$(echo $CT_SCHEDULE | cut -d '-' -f 2)
    if ! test_in_hours; then
      ct_logger "Outside of hours range: Pause"
      while ! test_in_hours; do
        sleep 3600
        _CT_T=$(date "+%H")
      done
      ct_logger "Inside of hours range: Resume"
    fi
  fi
  return
}

_ct_test_remapped(){
  # Wait for Remapped <= MAX_REMAPPED
  # Param 1: Max remapped
  _ct_mr=$CT_MAX_REMAPPED
  _ct_very_high=$((CT_MAX_REMAPPED * 3))
  if [ -z "$1" ]; then
    _ct_max=$((CT_MAX_REMAPPED * 2))
  elif [[ $1 -eq 0 ]]; then
    _ct_mr=0
  else
    _ct_max=$1
  fi
  _ct_cbf=$(ct_get_current_remapped)
  if [[ $_ct_cbf -gt $_ct_very_high ]]; then
    ct_notify "Currently remapped PGs is very high"
    while [[ $_ct_cbf -gt $_ct_very_high ]]; do
      sleep 60
      _ct_cbf=$(ct_get_current_remapped)
    done
    return 1
  elif _check_variation_remapped ; then
    ct_notify "Remapped PG don't change for ~ 10 minutes"
    while _check_variation_remapped; do
      sleep 60
      _ct_cbf=$(ct_get_current_remapped)
    done
    return 1
  elif [[ $_ct_cbf -gt $_ct_mr ]]; then
    _ct_cu=$((CT_MAX_REMAPPED - (_ct_cbf - _ct_mr)))
    if [ $_ct_cu -lt 0 ]
    then
      _ct_cu=0
    fi
    echo -ne "\r[$(_ct_progress ${_ct_cu} ${_ct_max})] ==> Current remapped > Max remapped (${_ct_cbf}>${_ct_mr})      "
    return 1
  fi
  return 0
}

_ct_progress() {
  # Display Progress Bar
  # Param 1: current value
  # Param 2: max value
  _ct_step=$((${2:-64} / 64))
  _ct_max=$2
  _ct_current=$1
  printf '%.0s#' $(seq 0 $_ct_step $_ct_current)  
  printf '%.0s_' $(seq $_ct_current $_ct_step $_ct_max)  
}

ct_notify() {
  # Notify by external tool
  # Param 1: message
  if [ ! -z "$CT_EXIT_ON_ERROR" ]; then
#    echo $(hostname -s)-$(basename $0): ${1}: Script stopped >&2
    ct_logger "Critical -> $1: Script stopped"
    setterm -cursor on
    echo 
    exit 1
  else
#    echo $(hostname -s)-$(basename $0): ${1}: Script paused >&2
    ct_logger "Critical ->  $1: Script paused"
  fi
}

ct_logger() {
  # Log in syslog
  # Param 1: message
  echo $1
  logger -i "$(basename $0): $1"
}

ct_healthy_wait() {
# Wait For cluster Heathly
  _ct_test_osd_down
  _ct_test_temp
  _ct_test_schedule
  while ! _ct_test_remapped $1; do
    sleep 10
  done
  echo
}
 

if [ ! -z "$CT_WAIT" ]
then
  ct_healthy_wait 0
fi
