# -*- coding:utf-8 -*-
# This is a Python3 script.
# author: wpf999[in]equn.com
# release date: 20160216
# release date: 20170920
# release date: 20200405 #fix 5 bugs
# release date: 20200406 #detect ImportError
# release date: 20200407 #fix bug in function get_gpu_list
# release date: 20200408 #fix bug for os name map
# release date: 20200410 #refine
# release date: 20200411 #delete unused function
# release date: 20200412 #delete unused function
# release date: 20200412 #improve string matching for fah log
# release date: 20200413 #improve & refine & fix bug
# release date: 20200414 fix bug
# release date: 20200415 refine
# release date: 20200504 refine
# release date: 20211204 fix bug for new GPU driver
__version__='20211204'
import sys
if sys.version_info.major != 3 :
print( 'python3 is needed. \npress enter to exit...' )
sys.stdin.readline()
exit(-1)
try:
import os
import platform
import time
import xml.dom.minidom
import logging
import urllib.parse
import ssl
ssl._create_default_https_context = ssl._create_unverified_context
import http.client
except:
t,v,_ = sys.exc_info()
print(t,v)
print( 'press enter to exit...' )
print('$PATH:',os.getenv('PATH'))
sys.stdin.readline()
exit(-1)
def get_os_info():
uname = platform.uname()
os_name = uname.system + uname.release
if uname.system.upper() == 'Windows'.upper():
os_name = uname.system + uname.release
else:
os_name = uname.system
return {
'name':os_name ,
'arch':uname.machine
}
#end def
def read_log(fah_logfile: str):
contents = []
FS_index = {}
cfg_begin = cfg_end = 0
f = open(fah_logfile, mode='rb')
bytes_list = f.readlines()
f.close()
for i,b in enumerate(bytes_list):
line=b.decode('UTF-8', errors='ignore').strip()
contents.append( line )
item=line.split(':')
# get last WU index for echo slot
if (len(item) == 6) and (item[-1] == 'Starting') and ( item[4].startswith('FS') ) and ( item[3].startswith('WU') ):
FSxx = item[4]
FS_index[FSxx] = i #it is great, only records the last starting index
if (len(item) == 4) and (item[-1] == ''):
cfg_begin = i
if (len(item) == 4) and (item[-1] == ''):
cfg_end = i
cfg_index = (cfg_begin, cfg_end)
return contents, FS_index, cfg_index
#end def
def get_config(log_lines, cfg_index):
i_begin, i_end = cfg_index
if i_begin >= i_end :
raise Exception('can not find ')
config_xml = ''
for i in range( i_begin, i_end+1 ):
config_xml += log_lines[i].lstrip('1234567890:')
#config line maybe contain ':', so split(':') method causes a bug
user,team,num_slots = parse_config_xml(config_xml)
return {
'user':user,
'team':team,
'num_slots':num_slots
}
#end def
def parse_config_xml(config_xml: str):
DOMTree = xml.dom.minidom.parseString(config_xml)
root = DOMTree.documentElement
user = root.getElementsByTagName('user')[0].getAttribute('v').strip()
team = root.getElementsByTagName('team')[0].getAttribute('v').strip()
n_slots = len(root.getElementsByTagName('slot'))
return user,team,n_slots
#end def
def get_WUxxFSxx(line: str):
#return line[9:18]
item = line.split(':')
WUxx = item[3]
FSxx = item[4]
return WUxx,FSxx
#end def
def get_WU_info(log_lines, index: int):
WUxx, FSxx = get_WUxxFSxx(log_lines[index])
#slot = FSxx.strip('FS')
found = 0
time_step_array = []
c = len(log_lines)
for i in range(index, c-1 ):
line = log_lines[i]
item = line.split(':')
if len(item)<5 :
#drop some line
#print('continue:',line)
continue
if (item[3]!=WUxx) or (item[4]!=FSxx) :
#drop some line
#print('continue:',line)
continue
# get_WU_core_PID
# The line likes '06:32:23:WU02:FS04:Core PID:3920'
if (len(item)==7) and ( item[-2] == 'Core PID') :
core_PID = int(item[-1])
found += 1
continue
# get_WU_core_and_project
# The line likes '06:32:23:WU02:FS04:0x22:Project: 14543 (Run 0, Clone 1319, Gen 22)'
if (len(item)==8) and (item[-2] == 'Project') :
core = item[-3]
project = item[-1].strip()
project_num = project.split()[0]
found += 1
continue
# get_WU_time_and_steps
# The line likes '05:54:27:WU04:FS03:0x22:Completed 4350000 out of 5000000 steps (87%)'
# item[-1] likes 'Completed 6000000 out of 8000000 steps (75%)'
if (len(item)==7) and ('out of' in item[-1]) and ('steps' in item[-1]) :
hour = int(item[0])
min = int(item[1])
sec = int(item[2])
t = 3600*hour + 60*min + sec
step = item[-1].split()[-1].strip('(%)')
step = int(step)
time_step_array.append((t, step))
#end for
if found == 2 :
return {
'slot':FSxx, #slot
'core_PID':core_PID,
'core':core,
'project_num':project_num,
'project':project,
'time_step_array':time_step_array
}
else:
raise Exception('WU info not found') #some exception
#end def
def compute_TPF(time_step_array: list):
if len(time_step_array) < 2: return 0,0,0,0,0,0
t0, step0 = time_step_array[0]
tx, stepx = time_step_array[-1]
#异常
if (tx-t0) < 0 : # next day
tx = tx + 24*3600
tpf = 1.0*(tx-t0)/(stepx-step0)
tpf = int( round(tpf) )
tpf_min = tpf//60
tpf_sec = tpf%60
return step0,stepx,t0,tx,tpf_min,tpf_sec
#end def
def get_nv_smi()-> str:
util_paths=[
r'/usr/bin/nvidia-smi' ,
str(os.getenv('SYSTEMDRIVE')) + r'\Program Files\NVIDIA Corporation\NVSMI\nvidia-smi.exe' ,
str(os.getenv('SYSTEMDRIVE')) + r'\Windows\System32\nvidia-smi.exe'
]
util_cmd = ''
for p in util_paths:
if os.path.exists(p) and os.path.isfile(p):
util_cmd = p
break
if util_cmd != '' :
#because of space characters in the path, we need to plus ""
if '\x20' in util_cmd:
util_cmd = '"' + util_cmd + '"'
return util_cmd
else:
raise Exception( 'can not find nvidia-smi! exit...' )
#end def
def get_nv_gpu_info():
util_cmd = get_nv_smi()
gpu_list=[]
utils_output_xml=os.popen( util_cmd + ' -q -x').read()
if 'NVIDIA-SMI has failed' in utils_output_xml:
print('NVIDIA-SMI has failed')
return []
#print utils_output_xml,len(utils_output_xml) #debug
DOMTree = xml.dom.minidom.parseString(utils_output_xml)
root = DOMTree.documentElement
driver_version = root.getElementsByTagName('driver_version')
#print len(driver_version) ,driver_version[0].nodeType, driver_version[0].childNodes[0].data #.tagName
driver_version_str = driver_version[0].childNodes[0].data
#print driver_version_str #debug
gpus = root.getElementsByTagName('gpu')
for gpu in gpus :
#gpu.hasAttribute('id')
product_name=gpu.getElementsByTagName('product_name')[0].childNodes[0].data
#print product_name
uuid = gpu.getElementsByTagName('uuid')[0].childNodes[0].data
#print( uuid )
graphics_clock = gpu.getElementsByTagName('clocks')[0].getElementsByTagName('graphics_clock')[0].childNodes[0].data
mem_clock = gpu.getElementsByTagName('clocks')[0].getElementsByTagName('mem_clock')[0].childNodes[0].data
pci_bus = gpu.getElementsByTagName('pci')[0].getElementsByTagName('pci_bus')[0].childNodes[0].data
#print('%15s'%'pci_bus:',pci_bus)
#print graphics_clock, mem_clock
pci_gpu_link_info_item = gpu.getElementsByTagName('pci')[0].getElementsByTagName('pci_gpu_link_info')[0]
pci_gen = pci_gpu_link_info_item.getElementsByTagName('pcie_gen')[0].getElementsByTagName('current_link_gen')[0].childNodes[0].data
pci_speed = pci_gpu_link_info_item.getElementsByTagName('link_widths')[0].getElementsByTagName('current_link_width')[0].childNodes[0].data
#print pci_gen, pci_speed
map_pid_pname={}
process_info_list = gpu.getElementsByTagName('processes')[0].getElementsByTagName('process_info')
#print 'len(process_info_list)', len(process_info_list)#[0].childNodes[0].data
for process_info in process_info_list:
pid = process_info.getElementsByTagName('pid')[0].childNodes[0].data
pid = int(pid)
process_name = process_info.getElementsByTagName('process_name')[0].childNodes[0].data
map_pid_pname[pid]= process_name
#end for
#n/a处理
if ( graphics_clock=='N/A' ) or ( mem_clock=='N/A' ):
#graphics_clock,mem_clock = nvapi_detect_clock(pci_bus)
print('can not detect GPU clock')
#raise Exception('can not detect GPU clock')
#end if
product_name = product_name.strip('NVIDIA').strip()
ginfo = { 'driver':driver_version_str,
'uuid':uuid,
'pci_bus':pci_bus,
'name':product_name,
'graphics_clock':graphics_clock,
'mem_clock':mem_clock,
'pci_gen':pci_gen,
'pci_speed':pci_speed,
'pid_list':map_pid_pname
}
gpu_list.append(ginfo)
#end for
return gpu_list
#end def
def get_gpu_info():
#'only for nvidia GPU now!'
return get_nv_gpu_info()
#end def
def get_manho_html()-> str:
username = 'wpf'
team = '3213'
#post_body={}
#post_body['username']=username
#post_body['team']=team
#params = urllib.parse.urlencode(post_body)
#cookie=""
#header={"Content-type": "application/x-www-form-urlencoded", "Cookie": cookie }
#html="" #html needs a initial value
# conn = http.client.HTTPSConnection('fah.manho.org')
# conn.request("POST", "/gpu_statistics.php?a=add", params , header)
# resp = conn.getresponse()
# if resp.status!=200 :
# print('===========HTTP response code is not 200 !===========')
# return ""
# resp_data = resp.read()
# html= resp_data.decode( 'utf-8' )
# conn.close()
#print(html)
cookie='username='+username+'; team='+team
header={"Content-type": "application/x-www-form-urlencoded", "Cookie": cookie }
post_body={}
params = urllib.parse.urlencode(post_body)
html=''
try:
conn = http.client.HTTPSConnection('fah.manho.org', timeout=10)
conn.request('POST', '/gpu_statistics.php?a=add', params, header)
resp = conn.getresponse()
if resp.status != 200:
print('===========HTTP response code is not 200 !===========')
return ''
resp_data = resp.read()
html = resp_data.decode('utf-8')
#print(html) #debug
conn.close()
except:
t, v, _ = sys.exc_info()
print(t, v)
print(time.asctime(time.localtime()),': Network exception: can not visit fah.manho.org')
return ''
return html
#end def
def get_manho_gpu_table(html: str):
html_select_gpu = html.split('')[0]
op_list = html_select_gpu.split('')
#print len(op_list)
op_list.pop() #remove the last item, it is a ""
#print len(op_list)
#print op_list
map_gpuname_gpuid = {}
for hh in op_list:
tmp = hh.split('>')
gpuname = tmp[1].strip().replace('\x20','').upper()
gpuid = tmp[0].split('=')[1].strip().strip('"')
map_gpuname_gpuid[gpuname] = gpuid
#print( map_gpuname_id ) #debug
return map_gpuname_gpuid
#end def
def get_manho_os_table(html: str):
#html may be checked in the future
os_table = {'Windows XP' : '1',
'Windows Vista' : '2',
'Windows 2008 Server' : '2',
'Windows 7' : '3',
'Windows 2008 Server R2' : '3',
'Windows 8' : '4',
'Windows 2012 Server' : '4',
'Windows 8.1' : '5',
'Windows 10' : '6',
'Linux' : '7'}
t = {}
for item in os_table.keys():
x = item.replace('\x20','')
t[x] = os_table[item]
return t
#end def
def get_manho_table():
html = get_manho_html()
if html == '' :
return None
gpu_table = get_manho_gpu_table(html)
os_table = get_manho_os_table(html)
return {
'gpu_table':gpu_table,
'os_table':os_table
}
#end def
def fill_form( user: str, team: str, WU_info, gpu_info, os_info, manho_table ):
driver = gpu_info['driver']
graphics_clock = gpu_info['graphics_clock'].strip('MHz').strip()
mem_clock = gpu_info['mem_clock'].strip('MHz').strip()
pci_gen = gpu_info['pci_gen']
pci_speed = gpu_info['pci_speed'].strip('x').strip()
#deal with N/A value
if graphics_clock=='N/A' or mem_clock=='N/A':
return None
if (pci_gen=='N/A') or ('Error' in pci_gen):
pci_gen='3' #when the value is N/A, assume PCIE3.0*16
if '1'==pci_gen:
pci_gen = '1.1'
if '2'==pci_gen:
pci_gen = '2.0'
if '3'==pci_gen:
pci_gen = '3.0'
if pci_speed not in ('1','4','8','16'):
pci_speed='16'
#################################################################
gpu_table = manho_table['gpu_table']
gpuname=gpu_info['name'] # gpu_info['name'] is an official GPU name
gpuname=gpuname.replace('\x20','').upper() # delete space char for 1660Ti ~ 1660 Ti
# #手动处理特殊情况
# if gpuname.endswith('SUPER') :
# gpuname = gpuname.replace('SUPER','S') #manho's GPU name is not an official name
if gpuname in gpu_table.keys():
gpuid = gpu_table[gpuname]
else:
raise Exception( 'can not find your GPU id on fah.manho.org, exit...' )
#################################################################
os_table = manho_table['os_table']
if os_info['name'] in os_table.keys():
os_id=os_table[ os_info['name'] ]
else:
raise Exception( 'can not find your OS id on fah.manho.org, exit...' )
if os_info['arch'] == 'AMD64' or os_info['arch']=='x86_64':
arch='64'
else:
arch='32'
#################################################################
core_ver = WU_info['core'].strip('0x')
project_num = WU_info['project_num']
tpf_min = str(WU_info['tpf_min'])
tpf_sec = str(WU_info['tpf_sec'])
return {'user':user,
'team':team,
'gpuid':gpuid,
'corever':core_ver,
'projectnum':project_num,
'tpfmin':tpf_min,
'tpfsec':tpf_sec,
'driver':driver,
'gpucoreclock':graphics_clock,
'gpumemclock':mem_clock,
'pciever':pci_gen,
'pciespeed':pci_speed,
'os':os_id,
'arch':arch
}
#end def
def post_form(form_para)-> int:
if form_para == None:
return 1
user = form_para['user']
team = form_para['team']
post_body = form_para
post_body['submit'] = ''
post_body['auto'] = '1'
params = urllib.parse.urlencode(post_body)
cookie = 'username='+user+'; team='+team
header = {'Content-type': 'application/x-www-form-urlencoded', 'Cookie': cookie}
html = '' # html needs a initial value
try:
conn = http.client.HTTPSConnection('fah.manho.org', timeout=10)
conn.request('POST', '/gpu_statistics.php?a=add', params, header)
resp = conn.getresponse()
if resp.status != 200:
print('===========HTTP response code is not 200 !===========')
return 2
resp_data = resp.read()
html = resp_data.decode('utf-8')
conn.close()
except:
t, v, _ = sys.exc_info()
print(t, v)
print('网络异常,本次提交失败。下一次继续重试...')
return 3
#print( html ) #debug
if ('您输入的数据已经成功提交' in html) or ('未找到符合用户名的记录' in html):
logging.info('===========Submit OK ! ===========')
logging.info(post_body)
print('===========Submit OK ! ===========')
return 0
else:
print(html[:100])
print('===========Submit Error!===========')
return 4
#end def
# def split_log(log_lines, FS_index):
# slot_log = {}
# for FS in FS_index.keys():
# slot_log[FS] = []
# index_min = min( FS_index.values() )
# c = len(log_lines)
# for i in range(index_min, c-1):
# line = log_lines[i]
# item = log_lines[i].split(':')
# if(len(item)<5):
# continue
# else:
# if item[4].startswith('FS') and item[3].startswith('WU'):
# FSxx = item[4]
# slot_log[FSxx].append(line)
# else:
# #print(line) #debug
# continue
# for FS, log in slot_log.items():
# slot_log[FS] = get_last_starting(log)
# return slot_log
# #end def
# def get_last_starting(FS_log):
# c=len(FS_log)
# for i in range(c-1, -1 ,-1):
# if FS_log[i].endswith(':Starting'):
# return FS_log[i:]
# return None
# #end def
def do_slot_log(log_lines, index: int, user: str, team: str, os_info, gpu_info_list, manho_table):
global FAH_GPU_CORES
global submit_db
WU_info = get_WU_info(log_lines, index)
FSxx = WU_info['slot']
core = WU_info['core']
project_num = WU_info['project_num']
project = WU_info['project']
print('='*60)
print('%20s'%'Slot ID:',FSxx)
print('%20s'%'Core:',core)
print('%20s'%'Project:',project_num)
print('%20s'%'Project(RCG):',project)
time_step_array = WU_info['time_step_array'] #get_WU_time_and_steps(lines)
if len(time_step_array) < 5 :
print('data is not enough! skip...')
return -1
step0,stepx,t0,tx,tpf_min,tpf_sec = compute_TPF(time_step_array)
print('%20s'%'progress:' , [step0,stepx] )
print('%20s'%'running sec:', [t0,tx] )
print('%20s'%'TPF:',tpf_min,'min',tpf_sec,'sec')
WU_info['tpf_min']=tpf_min
WU_info['tpf_sec']=tpf_sec
if core not in FAH_GPU_CORES :
print('skip cpu slot...')
return -2 #skip cpu slot
# if len(gpu_info_list) == 1 : #only cope with one GPU
# gpu_info = gpu_info_list[0] #bug!
# else:
#'需要找到本slot对应的GPU'
#'按PID寻找GPU'
core_PID = WU_info['core_PID']
gpu_info = None
for ginfo in gpu_info_list:
#print( 'keys:',ginfo['pid_list'].keys() ) #debug
if core_PID in ginfo['pid_list'].keys() :
gpu_info = ginfo
#'找到了!'
if gpu_info is None :
#print('Can not find GPU running on process #'+str(core_PID))
#return -1
gpu_info = gpu_info_list[0]
#end if
print('%20s'%'GPU:' , gpu_info['name'])
print('%20s'%'GPU Driver:', gpu_info['driver'])
print('%20s'%'GPU Clock:' , gpu_info['graphics_clock'])
print('%20s'%'GMem Clock:', gpu_info['mem_clock'])
print('%20s'%'pci_gen:' , gpu_info['pci_gen'])
print('%20s'%'pci_speed:' , gpu_info['pci_speed'])
print('%20s'%'pci_bus:' , gpu_info['pci_bus'] )
sys.stdout.flush()
if project in submit_db:
print( 'No results need to be submitted. Sleeping...' )
return 0
else:
formXXX = fill_form(user,team, WU_info, gpu_info, os_info, manho_table)
ret = post_form( formXXX ) #send to fah.manho.org
if ret==0 : #submit OK!
submit_db.add(project)
print('%20s'%'submit_db:', (project in submit_db))
return 0
#end def
def auto_ppd_submit_main():
############################################################################
print('-'*80)
print('Starting some check...')
print('--check fah.manho.org...')
manho_table = get_manho_table()
if manho_table is None:
print('can not visit fah.manho.org, try to submit result later')
return
print('--check fah.manho.org: Done')
print('--check GPU and its status...')
gpu_info_list = get_gpu_info()
if len(gpu_info_list) < 1: raise Exception('No GPU in your system! exit...')
print('--check GPU and its status: Done')
print('--check fah log existence...')
f = search_fah_log()
print('--check fah log existence: Done')
############################################################################
print('Scanning fah log...')
print('-'*80)
log_lines, FS_index, cfg_index = read_log(f)
config = get_config(log_lines, cfg_index)
user = config['user']
team = config['team']
n_slots = config['num_slots']
os_info = get_os_info( )
print('%20s'%'User:' , user )
print('%20s'%'Team:' , team )
print('%20s'%'Total Slots:', n_slots )
print('%20s'%'OS:' , os_info['name'] )
print('%20s'%'OS Arch:' , os_info['arch'] )
print('%20s'%'Last config Index:', list(cfg_index) )
print('%20s'%'Last WU Index:' , FS_index ) #FS_index: last WU starting index for echo slot
if (len(FS_index) == 0):
print('### not enough data. sleep...')
return
# slots_log = split_log(log_lines, FS_index)
# # for FS, log in slots_log.items():
# for log in slots_log.values():
# # print('='*60)
# # print('FS:',FS)
# # print(''*80)
# # print('\r\n'.join(log))
# do_slot_log(log, user,team, os_info, gpu_info_list, manho_table )
for index in FS_index.values():
do_slot_log(log_lines, index, user, team, os_info, gpu_info_list, manho_table)
print('-'*80)
print(time.asctime(time.localtime()), '\n\n')
sys.stdout.flush()
#end def
def init():
try:
#set window title in Windows
if platform.system() == 'Windows' :
os.system('title auto_ppd_submit.py')
#set work dir
print('*'*80)
print('my name:',__file__)
pwd = os.path.split(os.path.realpath(__file__))[0]
print('pwd:',pwd)
os.chdir(pwd)
print('current dir:', os.getcwd() )
#check nvidia-smi tool
print('nvidia-smi:' , get_nv_smi() )
#check fah log
print('fah log file:', search_fah_log() )
#set my log
logging.basicConfig(filename='auto_ppd_submit.log', level=logging.DEBUG, format='[%(asctime)s] %(name)s:%(levelname)s: %(message)s' )
except:
t, v, errinfo = sys.exc_info()
print(t, v, '\nerror line:',errinfo.tb_lineno)
print('press enter to exit...')
sys.stdin.readline()
exit(-1)
#end try
#end def
def search_fah_log()-> str:
log_paths = [
'log.txt' , #current dir
str(os.getenv('SYSTEMDRIVE')) + r'\Program Files (x86)\FAHClient\log.txt' ,
str(os.getenv('SYSTEMDRIVE')) + r'\Users\root\AppData\Roaming\FAHClient\log.txt'
]
for p in log_paths:
if os.path.exists( p ) and os.path.isfile( p ) :
return p
print('#'*60)
print('')
print('can not find folding@home log file!')
print('please put auto_ppd_submit.py in folding@home work dir.' )
print('')
print('#'*60)
raise Exception('NO fah log file')
#end def
def main():
DEBUG = False
init()
########## main loop ##########
while True:
try:
auto_ppd_submit_main()
except:
t, v, errinfo = sys.exc_info()
print(t, v, '\nerror line:',errinfo.tb_lineno)
# never exit if error
#end try
try:
time.sleep(60 if not DEBUG else 1)
except:
t, v, errinfo = sys.exc_info()
print(t, v, '\nerror line:',errinfo.tb_lineno)
print('press enter to exit...')
sys.stdin.readline()
exit(-1)
#end try
#end while
# ##################################################################################################
FAH_GPU_CORES = ('0x15', '0x16', '0x17', '0x18', '0x19', '0x20', '0x21', '0x22')
submit_db = set()
if __name__ == '__main__':
main()