# -*- coding: utf-8 -*- ####################### # file : simple_git_download.py # author : nemon (nemon@aliyun.com) # version : 1.0.0 # document: https://github.com/nemon-/simple_git_download/blob/master/README.md # project : https://github.com/nemon-/simple_git_download # licence : MIT License (https://github.com/nemon-/simple_git_download/blob/master/LICENSE) ####################### import urllib class git_down(): def __init__(self): self._arr_src=None self._target_path=['.'] self._output_type='wget' self._src_root=r'https://github.com' def outFile(self,s,f='./log.log'): print 'write file: ',f try: f= open(f,'wb') f.write(s) f.close() except Exception as e: print 'ERROR !',e def creatDir(self,arry_path): import os work_path ='/'.join(arry_path) if not os.path.exists( work_path ): os.makedirs( work_path ) def getUrlContent(self,work_url): print 'open url content: ',work_url s='' try: sf = urllib.urlopen(work_url) # html = sf.readlines() # htmls = ''.join(html) s =sf.read() sf.close() except Exception as e: print 'ERROR !',e finally: return s def getMinTagStart(self,html_str,sTag): i1_1 = html_str.find('<'+sTag+'>') i1_2 = html_str.find('<'+sTag+' ') if i1_1<0: return i1_2 elif i1_2<0: return i1_1 else: return min(i1_1,i1_2) def getStrsByTag(self,html_str,sTag): a = [] tmp_s = html_str i1 = self.getMinTagStart(tmp_s,sTag) while i1>=0: tmp_s = tmp_s[i1:] i2 = tmp_s.find('') a.append( tmp_s[0:i2+len('')] ) tmp_s = tmp_s[i2+len(''):] i1 = self.getMinTagStart(tmp_s,sTag) return a def getHrefAndName(self,tagA): tmps=tagA i1=tmps.find('href="') tmps=tmps[i1+len('href="'):] i2=tmps.find('"') s_href = tmps[0:i2] tmps = tmps[i2:] i3=tmps.rfind("') s_name = tmps[i4+1:] return (s_href,s_name) def getList(self,htmls): i1 = htmls.find('') h2=htmls[i1:] i2 = h2.find('
') h3 = h2[0:i2+len('')] h4 = ''.join(self.getStrsByTag(h3,'tbody')) l5 = self.getStrsByTag(h4,'a') l6 = [ x for x in l5 if x.find('class="js-navigation-open"')>=0] i7=[self.getHrefAndName(x) for x in l6] return i7 def getFName(self,url): work_path = url if url[-1]!='/' else url[0:-1] i1 = work_path.rfind('/') name = work_path if i1<0 else work_path[i1+1:] return name def findAllList(self,work_url,arry_tgt,str_type,s_root): agl = [] url = s_root+work_url # print url htmls=self.getUrlContent(url) ipos=htmls.find('0: tmps=htmls[ipos:] tmpa=self.getHrefAndName(tmps) agl.append( ('file',arry_tgt[::],s_root+tmpa[0]) ) print 'find file: '+ s_root+tmpa[0] else: agl.append( ('path',arry_tgt[::],url) ) print 'search path: '+ url tmpl=self.getList(htmls) for i in tmpl: arry_tgt.append( i[1] ) agl.extend( self.findAllList( i[0] , arry_tgt,str_type,s_root) ) arry_tgt.pop() return agl def main(self,setting=None): if setting : # print setting agl = self.findAllList(setting['arr_src'],setting['target_path'],setting['output_type'],setting['s_src_root']) if setting['output_type']=='wget': path = setting['target_path'][::] path.append('download.sh') f_sh = open( '/'.join(path) ,'wb') f_sh.writelines( '\n'.join([ 'madir '+'/'.join(item[1]) if item[0]=='path' else 'wget -P '+'/'.join(item[1])+' '+item[2] for item in agl] ) ) f_sh.close() elif setting['output_type']=='down': for item in agl: if item[0]=='path': #print '+ path: ','/'.join(item[1]),item[2] self.creatDir( item[1] ) elif item[0]=='file': #print '- file: ','/'.join(item[1]),item[2] self.outFile( self.getUrlContent(item[2]) , '/'.join(item[1])) else: print 'ERROR:',item[0],'/'.join(item[1]),item[2] else: print for i in agl: print i else: raise Exception , 'parameter error.' def showCLIHelp(err_msg=None): if err_msg : print err_msg print "USAGE:" print "python simple_git_download.py src_url target_path [down|wget]" print "EXAMPLE:" print "python simple_git_download.py https://github.com/nemon-/simple_git_download.git . down" if __name__ == '__main__': import sys git_down_agent = git_down() if len( sys.argv)<3: showCLIHelp() else: """ tmp ={} for item in sys.argv: arr_para = item.split('=') tmp[ arr_para[0] ] = arr_para[1:] para ={} for item in ['src','tgt']: para[ item ] = tmp [ item ] """ para_src = sys.argv[1] para_tgt = sys.argv[2] if len( sys.argv)>=4: para_typ = sys.argv[3] if sys.argv[3] in ['wget','down'] else 'wget' else: para_typ = 'wget' arry_tgt = para_tgt.split(r'/') if arry_tgt[-1]=='': arry_tgt.pop() arry_src = para_src.split(r'/') if not( len(arry_src)>=4 ): showCLIHelp('MUST WITH project name.') else: s_src_root = arry_src[0] +r'//' +arry_src[2] # 'https://github.com' arry_src = arry_src[3:] if len(arry_src)==2 and arry_src[1][-4:]=='.git': arry_src[1] = arry_src[1][0:-4] # print 'arry_src:',arry_src if ( len(arry_src)>1 and len(arry_src)<2 ): showCLIHelp('MUST WITH tree/master .') elif ( len(arry_src)>2 and arry_src[2]!='tree'): showCLIHelp('MUST WITH tree/master .') elif ( len(arry_src)>=3 and arry_src[3]!='master'): showCLIHelp('MUST WITH tree/master ..') else: if arry_src[-1]=='': arry_src.pop() if len(arry_src)==2: arry_src.extend(( 'tree', 'master')) arry_src[0]='/'+arry_src[0] git_down_agent.main({'arr_src':'/'.join(arry_src),'target_path':arry_tgt,'output_type':para_typ.lower(),'s_src_root':s_src_root})