import numpy as np
import requests
import re
import pandas as pd
def xlsxget():
urlsa = ['https://www.chinabond.com.cn/cb/cn/zzsj/zzjgcp/cpxz/qxxz/zzgzqx/list_1.shtml',
'https://www.chinabond.com.cn/cb/cn/zzsj/zzjgcp/cpxz/qxxz/zzgzqx/list_2.shtml']
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/112.0.0.0 Safari/537.36'}
count = -1
for urla in urlsa:
res = requests.get(urla, headers=headers)
res.encoding = res.apparent_encoding
texta = res.text
pattern_a = re.compile('.*?',
re.S)
before = 'https://www.chinabond.com.cn/'
urlsb = re.findall('''.*?''',
texta, re.S)
finallist = []
for i in range(len(urlsb)):
urlsb[i] = before + urlsb[i]
for i in urlsb:
res1 = requests.get(i, headers=headers)
res1.encoding = res1.apparent_encoding
textb = res1.text
pattern_b = re.compile('=2010]
ff5=ff5.iloc[:,:-1]
ff5['date']=ff5['date'].apply(lambda x:x.strftime('%Y-%m-%d'))
ff5.to_excel('ff5.xlsx')
def ff5_code_ten():
ff5=pd.read_excel('ff5.xlsx')
code_ten=pd.read_excel('汇总1.xlsx',usecols=[0,1,2,4])
ff5.columns=['date','mkt_rf','smb','hml','umd','rmw','cma']
code_ten.columns=['code','date','code_return','ten_return']
ff5['date']=pd.to_datetime(ff5['date'])
code_ten['date']=pd.to_datetime(code_ten['date'])
final=pd.merge(left=ff5,right=code_ten,on='date',how='inner')
final.to_excel('汇总2.xlsx')
def test():
df1 = pd.DataFrame({'a': [1, 2], 'b': [5, 6]})
df2 = pd.DataFrame({'a': [2, 1, 0], 'y': [6, 7, 8]})
df3 = pd.merge(left=df1,right=df2,how='inner',on='a')
print(df3)
if __name__ == '__main__':
ff5_code_ten()