#!/usr/bin/env bash AUTOFAN_VERSION="2.35" if [[ -z $RIG_CONF ]]; then #reread env variables as after upgrade this can be empty source /etc/environment export $(grep -vE '^$|^#' /etc/environment | cut -d= -f1) #export all variables from file fi . colors export DISPLAY=":0" #AUTOFAN_LOG="/var/log/hive-autofan.log" TIMEOUT_AMD=3 # per each value per each GPU TIMEOUT_NVIDIA=60 # per all GPU at once # Must be empty in release DEBUG_COMMANDS= SLEEP_TIME=15 # min difference between target and critical temp MIN_TEMP_DIFF=10 # smart mode dependance (1° per this value of fan speed) FAN_TEMP_SHIFT=10 ####################################################################### #settings (for autofan.conf without DEF_), default values #target GPU temperature DEF_TARGET_TEMP= #minimal fan speed DEF_MIN_FAN=30 #maximum fan speed DEF_MAX_FAN=100 #temperature to stop miner DEF_CRITICAL_TEMP=90 #action on reaching critical temp. "" to stop mining, reboot, shutdown DEF_CRITICAL_TEMP_ACTION= #AMD fan control (AMD control enable-0/AMD control disable-1) DEF_NO_AMD=0 #Reboot rig if GPU error (enable-1/disable-0) DEF_REBOOT_ON_ERROR=0 # mem temp DEF_TARGET_MEM_TEMP=100 # smart mode (target temp depends on fan speed) DEF_SMART_MODE=0 ####################################################################### # we will restart miner only it was stopped by this script miner_stopped_by_overheat=0 # flag that the message was sent unable_to_set_fan_speed=0 # flag if we met 511, 460, etc temperature_is_unreal=0 # unparsable data error_in_temp_readings=0 # critical_temp_exceeded=0 screen -wipe autofan >/dev/null ### # Log write function echo2 { #echo -e "$1" > /dev/tty1 [[ ! -z "$AUTOFAN_LOG" ]] && echo -e "$@" | sed $'s/\e\\[[0-9;:]*[a-zA-Z]//g' >> $AUTOFAN_LOG echo -e "$@" } function get_payload() { echo "$gpu_stats" | jq --slurp -r '.[] | .busids as $b | .temp as $t | .fan as $f | .power as $p | reduce range(0; $b|length) as $i ([]; . + [[$b[$i], $t[$i], $f[$i], $p[$i]]] ) | .[] | .[0]+" Temp: "+.[1]+"C Fan: "+.[2]+"% Power: "+.[3]+"W"' 2>/dev/null } function init_gpu() { #check $GPU_DETECT_JSON and do nothing while not exist while true; do [[ -f $GPU_DETECT_JSON ]] && break echo2 "${RED}No $GPU_DETECT_JSON file exists${NOCOLOR}" sleep 10 done gpu_detect_json=`cat $GPU_DETECT_JSON` amd_indexes_query='[ . | to_entries[] | select(.value.brand == "amd") | .key ]' readarray -t amd_indexes_array < <(echo "$gpu_detect_json" | jq -r "$amd_indexes_query | .[]") amd_cards_number=${#amd_indexes_array[@]} nvidia_indexes_query='[ . | to_entries[] | select(.value.brand == "nvidia") | .key ]' readarray -t nvidia_indexes_array < <(echo "$gpu_detect_json" | jq -r "$nvidia_indexes_query | .[]") nvidia_cards_number=${#nvidia_indexes_array[@]} readarray -t fan_count_array < <(echo "$gpu_detect_json" | jq -r '. | to_entries[] | select(.value) | .value.fan_cnt') readarray -t card_bus_ids_array < <(echo "$gpu_detect_json" | jq -r '[ . | to_entries[] | .value.busid ] | .[]') readarray -t card_names_array < <(echo "$gpu_detect_json" | jq -r '[ . | to_entries[] | .value.name ] | .[]') cpu_indexes_query='[ . | to_entries[] | select(.value.brand == "cpu") | .key ]' readarray -t cpu_indexes_array < <(echo "$gpu_detect_json" | jq -r "$cpu_indexes_query | .[]") # limit name width GPU_MAX_LEN=0 for (( i = 0 ; i < ${#card_names_array[@]}; i++ )); do [[ " ${cpu_indexes_array[@]} " =~ " $i " ]] && continue # remove some common strings card_names_array[i]="${card_names_array[i]/Radeon }" card_names_array[i]="${card_names_array[i]/GeForce }" [[ ${#card_names_array[i]} -gt $GPU_MAX_LEN ]] && GPU_MAX_LEN=${#card_names_array[i]} done [[ $GPU_MAX_LEN -gt 16 ]] && GPU_MAX_LEN=16 # set AMD $cardnum array if [[ $amd_cards_number -gt 0 ]]; then card_num=() card_fanmin=() card_fanmax=() card_hwmon=() for j in "${amd_indexes_array[@]}"; do [[ `echo /sys/bus/pci/devices/0000:${card_bus_ids_array[j]}/drm/card*/` =~ \/card([0-9]+)\/ ]] && card_num[j]=${BASH_REMATCH[1]} card_hwmon[j]=`realpath /sys/class/drm/card${card_num[j]}/device/hwmon/hwmon*/` [[ -z ${card_hwmon[j]} ]] && echo2 "${RED}Unable to get HWMON dir for gpu ${card_bus_ids_array[j]}${NOCOLOR}" && continue [[ -e ${card_hwmon[j]}/pwm1_max ]] && card_fanmax[j]=`head -1 ${card_hwmon[j]}/pwm1_max` || card_fanmax[j]=255 [[ -e ${card_hwmon[j]}/pwm1_min ]] && card_fanmin[j]=`head -1 ${card_hwmon[j]}/pwm1_min` || card_fanmin[j]=0 done fi #if [[ $nvidia_indexes_array == '[]' && $amd_indexes_array == '[]' ]]; then # echo2 "No ${BRED}AMD${NOCOLOR} or ${BGREEN}NVIDIA${NOCOLOR} cards detected" # exit 1 #fi echo2 "Detected ${WHITE}${amd_cards_number}${NOCOLOR} ${BRED}AMD${NOCOLOR} GPU, ${WHITE}${nvidia_cards_number}${NOCOLOR} ${BGREEN}NVIDIA${NOCOLOR} GPU" # check config if [ ! -f $AUTOFAN_CONF ]; then echo2 "${RED}No config $AUTOFAN_CONF${NOCOLOR}" fi } get_temp_output() { local -n output=$1 local target_temp=$2 local cur_temp=$3 local prev_temp=$4 local text="$5" [[ $target_temp -eq 0 || $cur_temp -eq 0 ]] && output="" && return printf -v output "%3s°" "$cur_temp" local temp_diff=$(( cur_temp - prev_temp )) printf -v temp_diff "%-4.4s" "${temp_diff#-}°" if [[ $cur_temp -lt $prev_temp ]]; then output="$output -$temp_diff" elif [[ $prev_temp -gt 1 && $cur_temp -gt $prev_temp ]]; then output="$output +$temp_diff" else output="$output " fi if [[ $cur_temp -lt $(( target_temp - 5 )) ]]; then output="${BBLUE}${output}${NOCOLOR}" elif [[ $cur_temp -lt $(( target_temp - 1 )) ]]; then output="${CYAN}${output}${NOCOLOR}" elif [[ $cur_temp -le $(( target_temp + 1 )) ]]; then output="${GREEN}${output}${NOCOLOR}" elif [[ $cur_temp -le $(( target_temp + 5 )) ]]; then output="${YELLOW}${output}${NOCOLOR}" #elif [[ $cur_temp -gt $(( target_temp + 5 )) ]]; then else output="${RED}${output}${NOCOLOR}" fi output="$text$output" } get_fan_speed() { local core_cur_temp="$1" local core_prev_temp="$2" local cur_fan_speed="$3" local gpu_bus_id="$4" local idx="$5" local brand="$6" local name="$7" local mem_cur_temp="$8" local mem_prev_temp="$9" local hyst=1 target_fan_speed=$cur_fan_speed # global local core_target_temp=$TARGET_TEMP local mem_target_temp=$TARGET_MEM_TEMP local max_fan=$MAX_FAN local min_fan=$MIN_FAN local msg= # change target temp according to fan speed [[ $cur_fan_speed -ge 1 && $SMART_MODE == 1 && $FAN_TEMP_SHIFT -gt 3 ]] && core_target_temp=$(( TARGET_TEMP - (50 - cur_fan_speed + FAN_TEMP_SHIFT/2)/FAN_TEMP_SHIFT )) # if speed is zero set it to 1 [[ $cur_fan_speed -lt 1 ]] && cur_fan_speed=1 # use mem temp if (( core_cur_temp <= (core_target_temp + 1) && mem_cur_temp > core_target_temp && mem_cur_temp >= (mem_target_temp - 2) )); then local cur_temp=$mem_cur_temp local prev_temp=0 #$mem_prev_temp local target_temp=$mem_target_temp hyst=2 else local cur_temp=$core_cur_temp local prev_temp=$core_prev_temp local target_temp=$core_target_temp fi # if GPU disabled if [[ -z $cur_temp ]]; then target_fan_speed=$max_fan # +1/-1 degree do nothing elif [[ $cur_temp -ge $(( target_temp - hyst )) && $cur_temp -le $(( target_temp + hyst )) ]]; then target_fan_speed=$cur_fan_speed # change speed according to temp diff elif [[ $cur_temp -ge 1 ]]; then [[ $cur_temp -gt $target_temp ]] && target_fan_speed=$(( (cur_fan_speed*cur_temp + target_temp - 1)/target_temp + 1 )) || target_fan_speed=$(( cur_fan_speed*cur_temp/target_temp )) fi # use previous temp for special cases if [[ $prev_temp -ge 1 && $prev_temp -ne $cur_temp ]]; then local diff=$(( cur_temp - prev_temp )) if [[ $cur_temp -gt $prev_temp ]]; then # do not spin down fan if temp going up [[ $target_fan_speed -lt $cur_fan_speed ]] && target_fan_speed=$cur_fan_speed # spin up more [[ $(( cur_temp - 1 )) -gt $target_temp ]] && target_fan_speed=$(( (cur_fan_speed*(cur_temp + diff) + target_temp - 1)/target_temp + 1 )) && msg="+" if [[ $diff -gt 1 ]]; then # spin up fan in advance [[ $(( cur_temp + diff - 1 )) -gt $target_temp ]] && target_fan_speed=$(( (cur_fan_speed*(cur_temp + diff) + prev_temp - 1)/prev_temp + 1 )) && msg="++" # spin up fan in advance even more [[ $cur_temp -lt $target_temp && $(( cur_temp + diff*2 - 1 )) -gt $target_temp ]] && target_fan_speed=$(( (cur_fan_speed*(cur_temp + diff*2) + prev_temp - 1)/prev_temp + 1 )) && msg="+++" # spin up fan faster if temp is more than target+5 fast_fan=$(( (max_fan + cur_fan_speed + 1)/2 )) [[ $cur_temp -gt $(( target_temp + 5 )) && $fast_fan -gt $target_fan_speed ]] && target_fan_speed=$fast_fan && msg="++++" fi elif [[ $cur_temp -lt $prev_temp ]]; then # do not spin up fan if temp going down [[ $target_fan_speed -gt $cur_fan_speed ]] && target_fan_speed=$cur_fan_speed # spin down fan in advance #[[ $diff -lt -1 && $(( cur_temp + diff + 2 )) -lt $target_temp ]] && # target_fan_speed=$(( cur_fan_speed*(cur_temp + diff)/target_temp + 1 )) && msg="-" fi fi # limit speed down to 5% [[ $(( target_fan_speed + 5 )) -lt $cur_fan_speed ]] && target_fan_speed=$(( cur_fan_speed - 5 )) [[ $target_fan_speed -lt $min_fan ]] && target_fan_speed=$min_fan [[ $target_fan_speed -gt $max_fan ]] && target_fan_speed=$max_fan # Just to make sure if [[ $brand == "Nvidia" ]]; then [[ $target_fan_speed -gt 99 ]] && target_fan_speed=99 else [[ $target_fan_speed -gt 100 ]] && target_fan_speed=100 fi ### Output after all modifications of target_fan_speed get_temp_output "core_temp" "$core_target_temp" "$core_cur_temp" "$core_prev_temp" get_temp_output "mem_temp" "$mem_target_temp" "$mem_cur_temp" "$mem_prev_temp" "Mem " local echo_fan= local fan_diff=$(( target_fan_speed - cur_fan_speed )) if [[ $target_fan_speed -gt $cur_fan_speed ]]; then printf -v echo_fan "${YELLOW}%3d%% %-4s${NOCOLOR}" "$target_fan_speed" "+$fan_diff%" elif [[ $target_fan_speed -lt $cur_fan_speed ]]; then printf -v echo_fan "${CYAN}%3d%% %-4s${NOCOLOR}" "$target_fan_speed" "$fan_diff%" elif [[ $target_fan_speed -eq $min_fan ]]; then printf -v echo_fan "${BBLUE}%3d%% %-4s${NOCOLOR}" "$target_fan_speed" "" elif [[ $target_fan_speed -eq $max_fan ]]; then printf -v echo_fan "${RED}%3d%% %-4s${NOCOLOR}" "$target_fan_speed" "" else printf -v echo_fan "%3d%% %-4s" "$target_fan_speed" "" fi [[ $brand == "Nvidia" ]] && printf -v name "${BGREEN}%-${GPU_MAX_LEN}.${GPU_MAX_LEN}s${NOCOLOR}" "$name" || printf -v name "${BRED}%-${GPU_MAX_LEN}.${GPU_MAX_LEN}s${NOCOLOR}" "$name" [[ $idx -lt 10 ]] && local index=" $idx" || local index="$idx" # pad index gpu_info[idx]="${BBLUE}$index${NOCOLOR} ${gpu_bus_id} $name Target ${WHITE}$core_target_temp°${NOCOLOR} Core $core_temp Fan $echo_fan $mem_temp $msg" } ### # What we must to do if temperature reached some limits check_overheat() { local t [[ $miner_stopped_by_overheat == 1 ]] && miner status > /dev/null && miner_stopped_by_overheat=0 if [[ $miner_stopped_by_overheat == 1 ]]; then local allisok=1 for t in "${temperatures_array[@]}"; do (( t > CRITICAL_TEMP - MIN_TEMP_DIFF + 1 )) && allisok=0 && break done if [[ $allisok == 1 ]]; then miner_stopped_by_overheat=0 #let's forget about this miner start local msg="GPU cooled down, mining resumed" message ok "$msg" #echo2 "${GREEN}$msg${NOCOLOR}" fi elif [[ $miner_stopped_by_overheat == 0 ]]; then for t in "${temperatures_array[@]}"; do # reboot on driver error if [[ $t -gt 120 ]]; then if [[ $REBOOT_ON_ERROR == 1 ]]; then local msg="Autofan: GPU temperature $t is unreal, driver error, rebooting" get_payload | message error "$msg" payload nohup bash -c 'sreboot' > /tmp/nohup.log 2>&1 & else if [[ $temperature_is_unreal == 0 ]]; then local msg="Autofan: GPU temperature $t is unreal, driver error" get_payload | message warning "$msg" payload temperature_is_unreal=1 fi fi break # prevent critical action for 1 cycle if temp is within 20° from TARGET_TEMP elif [[ $t -gt $CRITICAL_TEMP && $t -lt $(( TARGET_TEMP + 20 )) && $critical_temp_exceeded == 0 ]]; then echo2 "${YELLOW}Warning: critical temp exceeded $t°C > ${CRITICAL_TEMP}°C, waiting${NOCOLOR}" critical_temp_exceeded=1 # stop on CRITICAL_TEMP elif [[ $t -gt $CRITICAL_TEMP ]]; then #do not process temp 511, 460, etc critical_temp_exceeded=0 miner_stopped_by_overheat=1 miner stop local msg="GPU exceeded ${CRITICAL_TEMP}°C" if [[ $CRITICAL_TEMP_ACTION == "reboot" ]]; then msg+=", rebooting" elif [[ $CRITICAL_TEMP_ACTION == "shutdown" ]]; then msg+=", shutting down" else msg+=", mining stopped" fi get_payload | message error "$msg" payload if [[ $CRITICAL_TEMP_ACTION == "reboot" ]]; then nohup bash -c 'sreboot' > /tmp/nohup.log 2>&1 & elif [[ $CRITICAL_TEMP_ACTION == "shutdown" ]]; then nohup bash -c 'sreboot shutdown' > /tmp/nohup.log 2>&1 & fi break else critical_temp_exceeded=0 fi done fi } check_gpu_params() { local param=$1 #checking param is natural number if [[ -z "${param##*[!0-9]*}" ]]; then if [[ $REBOOT_ON_ERROR == 1 ]]; then local msg="Autofan: error in temp readings, rebooting" get_payload | message error "$msg" payload nohup bash -c 'sreboot' > /tmp/nohup.log 2>&1 & else if [[ $error_in_temp_readings == 0 ]]; then local msg="Autofan: error in temp readings" get_payload | message warning "$msg" payload error_in_temp_readings=1 fi fi #break ??? not working in function fi } # TODO merge with amd_auto_fan_control nvidia_auto_fan_control() { args= #set start index local fan_idx=0 local gpu_idx=0 for index in "${nvidia_indexes_array[@]}"; do # TODO Theese fields maybe moved inside `get_fan_speed` replaced by on nvidia_indexes_array[@] as argument local gpu_temperature=${temperatures_array[index]} local gpu_temperature_previous=${temperatures_array_previous[index]} [[ -z $gpu_temperature_previous ]] && gpu_temperature_previous=0 local gpu_fan_speed=${fans_array[index]} local gpu_fan_speed_previous=${fans_array_previous[index]} [[ -z $gpu_fan_speed_previous ]] && gpu_fan_speed_previous=0 local card_bus_id=${card_bus_ids_array[index]} local brand="Nvidia" local name="${card_names_array[index]}" local fan_count=${fan_count_array[index]} [[ -z $fan_count || $fan_count == "null" ]] && fan_count=1 #echo "get_fan_speed \"$gpu_temperature\" \"$gpu_temperature_previous\" \"$gpu_fan_speed\" \"$gpu_fan_speed_previous\" \"$card_bus_id\" $index $brand" check_gpu_params "$gpu_temperature" [[ -z "${gpu_fan_speed##*[!0-9]*}" ]] && gpu_fan_speed=$gpu_fan_speed_previous [[ $gpu_fan_speed_previous -eq 0 ]] && gpu_fan_speed_previous=$gpu_fan_speed #skip if no temp if [ ! -z "${gpu_temperature##*[!0-9]*}" ]; then get_fan_speed "$gpu_temperature" "$gpu_temperature_previous" "$gpu_fan_speed_previous" "$card_bus_id" $index "$brand" "$name" #do not set fan_speed if not changed [[ $target_fan_speed -ne $gpu_fan_speed_previous || $gpu_fan_speed -lt $(( target_fan_speed - 1 )) || $gpu_fan_speed -gt $(( target_fan_speed + 1 )) ]] && args+=" -a [gpu:$gpu_idx]/GPUFanControlState=1" for (( i = $fan_idx; i < fan_idx+fan_count; i++ )); do args+=" -a [fan:$i]/GPUTargetFanSpeed=$target_fan_speed" done fans_array[index]=$target_fan_speed else gpu_temperature[index]=0 fans_array[index]=$gpu_fan_speed fi gpu_idx=$(( gpu_idx+1 )) fan_idx=$(( fan_idx+fan_count )) done #[[ -n $args ]] && nvidia-settings $args > /dev/null 2>&1 if [[ -n $args ]]; then [[ $DEBUG_COMMANDS == 1 ]] && echo "nvidia-settings $args" nvs=`timeout --foreground -s9 $TIMEOUT_NVIDIA nvidia-settings $args 2>&1` if [[ $? -ne 0 ]]; then local debug=`dmesg -Tk | tail -n 300 | grep "Xid"` if [[ $REBOOT_ON_ERROR == 1 ]]; then echo -e "$nvs\n\n$debug" | message error "Autofan: unable to set fan speed, rebooting" payload nohup bash -c 'sreboot' > /tmp/nohup.log 2>&1 & else if [[ $unable_to_set_fan_speed == 0 ]]; then echo -e "$nvs\n\n$debug" | message warning "Autofan: unable to set fan speed" payload fi fi unable_to_set_fan_speed=1 fi fi } amd_get_fan_speed() { local index="$1" local speed=0 [[ -z "${card_hwmon[index]}" ]] && return 1 local fan=`timeout --foreground -s9 $TIMEOUT_AMD head -1 ${card_hwmon[index]}/pwm1 2>/dev/null` [[ $fan -gt ${card_fanmin[index]} && ${card_fanmax[index]} -gt ${card_fanmin[index]} ]] && speed=$(( (fan - card_fanmin[index])*100/(card_fanmax[index] - card_fanmin[index]) )) echo "$speed" } amd_set_fan_speed() { local index="$1" [[ -z "${card_hwmon[index]}" ]] && return 1 local fan="$2" local speed=$(( fan*(card_fanmax[index] - card_fanmin[index])/100 + card_fanmin[index] )) timeout --foreground -s9 $TIMEOUT_AMD bash -c "echo 1 > ${card_hwmon[index]}/pwm1_enable 2>/dev/null && echo $speed > ${card_hwmon[index]}/pwm1 2>/dev/null" #timeout --foreground -s9 $TIMEOUT_AMD echo $speed > ${card_hwmon[index]}/pwm1 2>/dev/null && return 0 #local pwm #pwm=`timeout --foreground -s9 $TIMEOUT_AMD head -1 ${card_hwmon[index]}/pwm1_enable 2>/dev/null` || return 2 #[[ "$pwm" == "1" ]] && return 3 #timeout --foreground -s9 $TIMEOUT_AMD echo 1 > ${card_hwmon[index]}/pwm1_enable 2>/dev/null || return 4 #timeout --foreground -s9 $TIMEOUT_AMD echo $speed > ${card_hwmon[index]}/pwm1 2>/dev/null || return 5 } amd_auto_fan_control() { for index in "${amd_indexes_array[@]}"; do # TODO Theese fields maybe moved inside `get_fan_speed` replaced by on amd_indexes_array[@] as argument local gpu_temperature=${temperatures_array[index]} local gpu_temperature_previous=${temperatures_array_previous[index]} [[ -z $gpu_temperature_previous ]] && gpu_temperature_previous=0 local gpu_fan_speed=${fans_array[index]} local gpu_fan_speed_previous=${fans_array_previous[index]} [[ -z $gpu_fan_speed_previous ]] && gpu_fan_speed_previous=0 local card_bus_id=${card_bus_ids_array[index]} local brand="AMD" local name="${card_names_array[index]}" local mem_temp=${memtemp_array[index]} local mem_temp_previous=${memtemp_array_previous[index]} #echo "get_fan_speed \"$gpu_temperature\" \"$gpu_temperature_previous\" \"$gpu_fan_speed\" \"$gpu_fan_speed_previous\" \"$card_bus_id\" $i $brand" check_gpu_params "$gpu_temperature" [[ ! -z "$mem_temp" ]] && check_gpu_params "$mem_temp" [[ -z "${gpu_fan_speed##*[!0-9]*}" ]] && gpu_fan_speed=$gpu_fan_speed_previous [[ $gpu_fan_speed_previous -eq 0 ]] && gpu_fan_speed_previous=$gpu_fan_speed #skip if no temp if [ ! -z "${gpu_temperature##*[!0-9]*}" ]; then get_fan_speed "$gpu_temperature" "$gpu_temperature_previous" "$gpu_fan_speed_previous" "$card_bus_id" $index "$brand" "$name" "$mem_temp" "$mem_temp_previous" #do not set fan_speed if not changed if [[ $target_fan_speed -ne $gpu_fan_speed_previous || $gpu_fan_speed -lt $(( $target_fan_speed - 1 )) || $gpu_fan_speed -gt $(( $target_fan_speed + 1 )) ]]; then [[ $DEBUG_COMMANDS == 1 ]] && echo "amd_set_fan_speed $index $target_fan_speed" #wolfamdctrl -i ${cardnum[index]} --set-fanspeed $target_fan_speed 1>/dev/null amd_set_fan_speed $index $target_fan_speed fi fans_array[index]=$target_fan_speed else gpu_temperature[index]=0 fans_array[index]=$gpu_fan_speed fi done } auto_fan_control() { local last_stats=0 local VARIABLES=("ENABLED" "TARGET_TEMP" "CRITICAL_TEMP" "CRITICAL_TEMP_ACTION" "MIN_FAN" "MAX_FAN" "NO_AMD" "REBOOT_ON_ERROR" "TARGET_MEM_TEMP" "SMART_MODE") local var local ref while true; do for var in "${VARIABLES[@]}"; do unset "$var" done [[ -f $AUTOFAN_CONF ]] && source $AUTOFAN_CONF # set default values for var in "${VARIABLES[@]}"; do [[ ! -z "${!var}" ]] && continue ref="DEF_$var" printf -v "$var" "${!ref}" done [[ $(( CRITICAL_TEMP - MIN_TEMP_DIFF )) -lt $TARGET_TEMP ]] && CRITICAL_TEMP=$(( TARGET_TEMP + MIN_TEMP_DIFF )) # wating for gpu-stats now=$(date +%s) for (( i=0; i < 30; i++ )); do new_stats=$(stat --printf %Y $GPU_STATS_JSON 2>/dev/null) if [[ $? -eq 0 && $new_stats -ne $last_stats && $new_stats -ge $now ]]; then last_stats=$new_stats gpu_stats="$(cat $GPU_STATS_JSON 2>/dev/null)" readarray -t temperatures_array < <(echo "$gpu_stats" | jq -r ".temp | .[]" 2>/dev/null) #echo ${temperatures_array[@]} [[ -n $temperatures_array ]] && break echo2 "${RED}No temp from gpu-stats${NOCOLOR}" else echo -n "." sleep 1 fi done echo echo2 "${CYAN}$(date +"%Y-%m-%d %T") ${NOCOLOR}" # reboot if temperatures_array is empty if [[ -z $temperatures_array || $temperatures_array == 'null' ]]; then if [[ $REBOOT_ON_ERROR == 1 ]]; then local msg="GPU driver error, no temps, rebooting" get_payload | message err "$msg" payload nohup bash -c 'sreboot' > /tmp/nohup.log 2>&1 & else if [[ $error_in_temp_readings == 0 ]]; then local msg="GPU driver error, no temps" get_payload | message warning "$msg" payload fi fi error_in_temp_readings=1 read -t $SLEEP_TIME continue fi readarray -t memtemp_array < <(echo "$gpu_stats" | jq -r "if .mtemp then .mtemp | .[] else empty end" 2>/dev/null) [[ ${#memtemp_array[@]} -ne ${#temperatures_array[@]} ]] && memtemp_array=() # miner_stop will work check_overheat ### waiting for the miner to work and if the miner does not work, wait for it a bit #khs= #for i in {1..12}; do #wait 60 seconds for miner then continue and wait again on next loop # [[ -f $HASHRATE_KHS ]] && khs=$(cat $HASHRATE_KHS) # [[ ! -z $khs && $khs != 0 ]] && break # echo2 "${YELLOW}Waiting for the miner to start hashing${NOCOLOR}" # read -t 5 #done # check if the .conf file exists if [[ $ENABLED == 1 && -n $TARGET_TEMP && -f $AUTOFAN_CONF && -f $GPU_STATS_JSON ]]; then local changed=0 for var in "${VARIABLES[@]}"; do ref="PREV_$var" [[ "${!ref}" == "${!var}" ]] && continue; printf -v "$ref" "${!var}" ((changed++)) done if [[ $changed -gt 0 ]]; then echo2 -n "${PURPLE} Target ${WHITE}${TARGET_TEMP}°C " [[ ${#memtemp_array[@]} -gt 0 ]] && echo2 -n "${PURPLE} Max Mem ${WHITE}${TARGET_MEM_TEMP}°C " echo2 -n "${PURPLE} Miner ${WHITE}${CRITICAL_TEMP_ACTION:-stop} ${PURPLE}at ${WHITE}${CRITICAL_TEMP}°C " echo2 -n "${PURPLE} Fan ${WHITE}${MIN_FAN}%${PURPLE}..${WHITE}${MAX_FAN}% " [[ $SMART_MODE == 1 ]] && echo2 -n "${PURPLE} Smart Mode ${WHITE}1°/${FAN_TEMP_SHIFT}% " [[ $NO_AMD == 1 && $amd_cards_number -gt 0 ]] && echo2 -n "${PURPLE} No ${BRED}AMD ${WHITE}enabled " [[ $REBOOT_ON_ERROR == 1 ]] && echo2 -n "${PURPLE} Reboot on error ${WHITE}enabled " echo2 "${NOCOLOR}" fi readarray -t fans_array < <(echo "$gpu_stats" | jq -r ".fan | .[]") [[ $DEBUG_COMMANDS == 1 ]] && echo "Fans: ${fans_array[@]}" gpu_info=() if [[ $nvidia_cards_number -gt 0 ]]; then nvidia_auto_fan_control fi if [[ $amd_cards_number -gt 0 && $NO_AMD != 1 ]]; then #AMD control is not disabled amd_auto_fan_control fi echo2 "$(printf "%s\n" "${gpu_info[@]}")" temperatures_array_previous=("${temperatures_array[@]}") memtemp_array_previous=("${memtemp_array[@]}") fans_array_previous=("${fans_array[@]}") elif [[ $ENABLED != 1 ]]; then echo2 "${PURPLE} Autofan ${WHITE}DISABLED ${NOCOLOR}" PREV_ENABLED= fi read -t $SLEEP_TIME done } function start() { session_count=`screen -ls autofan | grep -c ".autofan"` if [[ $session_count -gt 0 ]]; then echo -e "${RED}Autofan screen is already running${NOCOLOR}" echo -e "Run ${CYAN}autofan${NOCOLOR} or ${CYAN}screen -r autofan${NOCOLOR} to resume screen" return fi screen -dm -S autofan $0 loop echo2 "Autofan v$AUTOFAN_VERSION started" } function stop() { screens=(`screen -ls autofan | grep -Po "\K[0-9]+(?=\.autofan)" | sort --unique`) if [[ ${#screens[@]} -eq 0 ]]; then echo "No autofan screens found" else for pid in "${screens[@]}"; do echo "Stopping autofan screen session $pid" screen -S $pid.autofan -X quit done fi } function usage() { bname=`basename $0` echo -e "Usage: ${CYAN}$bname start|stop|restart|log${NOCOLOR}" echo -e "If you run ${CYAN}$bname${NOCOLOR} without parameters $bname screen will be tried to resume." } function get_log() { local log=/tmp/autofan.log [[ -f $log ]] && rm $log screen -S autofan -X hardcopy $log && #iconv -f ISO-8859-1 -t UTF-8 $log # needed to convert some symbols cat -s $log | sed "s/\xB0/°/g; s/\x91/↑/g; s/\x93/↓/g" # needed to convert some symbols } case $1 in log) get_log ;; loop) while true; do $0 run #echo "$(get_log)" | message warn "Autofan restarted after error" payload sleep 1 echo "Restarting..." done ;; run) echo2 "Autofan v$AUTOFAN_VERSION started" init_gpu auto_fan_control ;; start|dontattach) start ;; stop) stop ;; restart) stop && sleep 1 start ;; *) #screen -x -S autofan #[[ $? != 0 ]] && usage # for compatibility session_count=`screen -ls autofan | grep -c ".autofan"` if [[ $session_count -gt 0 ]]; then [[ -t 1 ]] && screen -x -S autofan || echo "Autofan is already running" else #start new screen start fi ;; esac