import numpy as np import requests import re import pandas as pd def xlsxget(): urlsa = ['https://www.chinabond.com.cn/cb/cn/zzsj/zzjgcp/cpxz/qxxz/zzgzqx/list_1.shtml', 'https://www.chinabond.com.cn/cb/cn/zzsj/zzjgcp/cpxz/qxxz/zzgzqx/list_2.shtml'] headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/112.0.0.0 Safari/537.36'} count = -1 for urla in urlsa: res = requests.get(urla, headers=headers) res.encoding = res.apparent_encoding texta = res.text pattern_a = re.compile('.*?', re.S) before = 'https://www.chinabond.com.cn/' urlsb = re.findall('''.*?''', texta, re.S) finallist = [] for i in range(len(urlsb)): urlsb[i] = before + urlsb[i] for i in urlsb: res1 = requests.get(i, headers=headers) res1.encoding = res1.apparent_encoding textb = res1.text pattern_b = re.compile('=2010] ff5=ff5.iloc[:,:-1] ff5['date']=ff5['date'].apply(lambda x:x.strftime('%Y-%m-%d')) ff5.to_excel('ff5.xlsx') def ff5_code_ten(): ff5=pd.read_excel('ff5.xlsx') code_ten=pd.read_excel('汇总1.xlsx',usecols=[0,1,2,4]) ff5.columns=['date','mkt_rf','smb','hml','umd','rmw','cma'] code_ten.columns=['code','date','code_return','ten_return'] ff5['date']=pd.to_datetime(ff5['date']) code_ten['date']=pd.to_datetime(code_ten['date']) final=pd.merge(left=ff5,right=code_ten,on='date',how='inner') final.to_excel('汇总2.xlsx') def test(): df1 = pd.DataFrame({'a': [1, 2], 'b': [5, 6]}) df2 = pd.DataFrame({'a': [2, 1, 0], 'y': [6, 7, 8]}) df3 = pd.merge(left=df1,right=df2,how='inner',on='a') print(df3) if __name__ == '__main__': ff5_code_ten()