华东理工大学《Python与金融计算》

单因子资产定价模型的实证检验(多资产)

蒋志强 2022-03-17 18:00-21:00

In [4]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import statsmodels.api as sm
from scipy.stats import chi2, f
import warnings
warnings.filterwarnings("ignore")
In [6]:
stock_data1 = pd.read_csv('1EShowData_data_5stocks_daily_price_2001-2010.csv', 
                          encoding='GB2312', 
                          usecols=[0, 1, 5, 10])
stock_data1
Out[6]:
股票代码_Stkcd 日期_Date 收盘价_Clpr 日无风险收益率_DRfRet
0 568 2001/1/2 12.70 0.000054
1 568 2001/1/3 13.00 0.000054
2 568 2001/1/4 12.94 0.000054
3 568 2001/1/5 13.41 0.000054
4 568 2001/1/8 13.38 0.000054
... ... ... ... ...
12179 600809 2010/12/27 66.13 0.000125
12180 600809 2010/12/28 67.47 0.000126
12181 600809 2010/12/29 69.08 0.000127
12182 600809 2010/12/30 68.00 0.000127
12183 600809 2010/12/31 68.53 0.000127

12184 rows × 4 columns

In [7]:
stock_data2 = pd.read_csv('1EShowData_data_5stocks_daily_price_2011-2015.csv',
                         encoding='GB2312',
                         usecols=[0, 1, 5, 10])
stock_data2
Out[7]:
股票代码_Stkcd 日期_Date 收盘价_Clpr 日无风险收益率_DRfRet
0 568 2011/1/4 40.73 0.000125
1 568 2011/1/5 39.35 0.000124
2 568 2011/1/6 38.66 0.000122
3 568 2011/1/7 38.71 0.000121
4 568 2011/1/10 38.47 0.000119
... ... ... ... ...
6105 600809 2015/12/25 18.80 0.000084
6106 600809 2015/12/28 18.52 0.000085
6107 600809 2015/12/29 18.76 0.000085
6108 600809 2015/12/30 18.90 0.000085
6109 600809 2015/12/31 19.27 0.000085

6110 rows × 4 columns

In [8]:
stock_data3 = pd.read_csv('1EShowData_data_5stocks_daily_price_2016-2018.csv',
                         encoding='GB2312',
                         usecols=[0, 1, 5, 10])
stock_data3
Out[8]:
股票代码_Stkcd 日期_Date 收盘价_Clpr 日无风险收益率_DRfRet
0 568 2016/1/4 24.99 0.000085
1 568 2016/1/5 25.40 0.000085
2 568 2016/1/6 25.93 0.000084
3 568 2016/1/7 23.98 0.000084
4 568 2016/1/8 24.16 0.000084
... ... ... ... ...
3663 600809 2018/12/24 36.85 0.000088
3664 600809 2018/12/25 36.44 0.000090
3665 600809 2018/12/26 35.66 0.000091
3666 600809 2018/12/27 35.03 0.000091
3667 600809 2018/12/28 35.05 0.000092

3668 rows × 4 columns

In [9]:
stock_data = stock_data1.append(stock_data2, ignore_index=True)
stock_data
Out[9]:
股票代码_Stkcd 日期_Date 收盘价_Clpr 日无风险收益率_DRfRet
0 568 2001/1/2 12.70 0.000054
1 568 2001/1/3 13.00 0.000054
2 568 2001/1/4 12.94 0.000054
3 568 2001/1/5 13.41 0.000054
4 568 2001/1/8 13.38 0.000054
... ... ... ... ...
18289 600809 2015/12/25 18.80 0.000084
18290 600809 2015/12/28 18.52 0.000085
18291 600809 2015/12/29 18.76 0.000085
18292 600809 2015/12/30 18.90 0.000085
18293 600809 2015/12/31 19.27 0.000085

18294 rows × 4 columns

In [10]:
stock_data = stock_data.append(stock_data3, ignore_index=True)
stock_data
Out[10]:
股票代码_Stkcd 日期_Date 收盘价_Clpr 日无风险收益率_DRfRet
0 568 2001/1/2 12.70 0.000054
1 568 2001/1/3 13.00 0.000054
2 568 2001/1/4 12.94 0.000054
3 568 2001/1/5 13.41 0.000054
4 568 2001/1/8 13.38 0.000054
... ... ... ... ...
21957 600809 2018/12/24 36.85 0.000088
21958 600809 2018/12/25 36.44 0.000090
21959 600809 2018/12/26 35.66 0.000091
21960 600809 2018/12/27 35.03 0.000091
21961 600809 2018/12/28 35.05 0.000092

21962 rows × 4 columns

In [11]:
stock_data.columns = ['code', 'date', 'close', 'rfreturn']
stock_data.dropna(inplace=True)
stock_data.sort_values(by=['code'], inplace=True)
stock_data
Out[11]:
code date close rfreturn
0 568 2001/1/2 12.70 0.000054
12692 568 2013/1/24 32.77 0.000106
12693 568 2013/1/25 31.66 0.000106
12694 568 2013/1/28 30.11 0.000106
12695 568 2013/1/29 30.46 0.000106
... ... ... ... ...
17077 600809 2011/1/13 62.16 0.000114
17076 600809 2011/1/12 60.80 0.000115
17075 600809 2011/1/11 61.38 0.000117
17157 600809 2011/5/17 70.00 0.000125
21961 600809 2018/12/28 35.05 0.000092

21074 rows × 4 columns

In [12]:
stk_codes = np.unique(stock_data['code'].values)
stk_codes
Out[12]:
array([   568,    596,    799,    858, 600809])
In [20]:
stock_data50 = stock_data[stock_data['code'] == stk_codes[0]]
stock_data50['date'] = pd.to_datetime(stock_data50['date'])
stock_data50.sort_values(by=['date'], inplace=True)
stock_data50['return'] = np.log(stock_data50['close']) - np.log(stock_data50['close'].shift(periods=1))
stock_data50.dropna(inplace=True)
ind = (stock_data50['return'] >= -0.1) & (stock_data50['return'] <= 0.1)
stock_data50 = stock_data50.loc[ind, :]
stock_data50
plt.plot(stock_data50['return'].values)
Out[20]:
[<matplotlib.lines.Line2D at 0x7fc7698802d0>]
In [21]:
stock_data51 = stock_data[stock_data['code'] == stk_codes[1]]
stock_data51['date'] = pd.to_datetime(stock_data51['date'])
stock_data51.sort_values(by=['date'], inplace=True)
stock_data51['return'] = np.log(stock_data51['close']) - np.log(stock_data51['close'].shift(periods=1))
stock_data51.dropna(inplace=True)
ind = (stock_data51['return'] >= -0.1) & (stock_data51['return'] <= 0.1)
stock_data51 = stock_data51.loc[ind, :]
stock_data51
plt.plot(stock_data51['return'].values)
Out[21]:
[<matplotlib.lines.Line2D at 0x7fc7697f0590>]
In [22]:
stock_data52 = stock_data[stock_data['code'] == stk_codes[2]]
stock_data52['date'] = pd.to_datetime(stock_data52['date'])
stock_data52.sort_values(by=['date'], inplace=True)
stock_data52['return'] = np.log(stock_data52['close']) - np.log(stock_data52['close'].shift(periods=1))
stock_data52.dropna(inplace=True)
ind = (stock_data52['return'] >= -0.1) & (stock_data52['return'] <= 0.1)
stock_data52 = stock_data52.loc[ind, :]
stock_data52
plt.plot(stock_data52['return'].values)
Out[22]:
[<matplotlib.lines.Line2D at 0x7fc76975f850>]
In [23]:
stock_data53 = stock_data[stock_data['code'] == stk_codes[3]]
stock_data53['date'] = pd.to_datetime(stock_data53['date'])
stock_data53.sort_values(by=['date'], inplace=True)
stock_data53['return'] = np.log(stock_data53['close']) - np.log(stock_data53['close'].shift(periods=1))
stock_data53.dropna(inplace=True)
ind = (stock_data53['return'] >= -0.1) & (stock_data53['return'] <= 0.1)
stock_data53 = stock_data53.loc[ind, :]
stock_data53
plt.plot(stock_data53['return'].values)
Out[23]:
[<matplotlib.lines.Line2D at 0x7fc76974fb10>]
In [24]:
stock_data54 = stock_data[stock_data['code'] == stk_codes[4]]
stock_data54['date'] = pd.to_datetime(stock_data54['date'])
stock_data54.sort_values(by=['date'], inplace=True)
stock_data54['return'] = np.log(stock_data54['close']) - np.log(stock_data54['close'].shift(periods=1))
stock_data54.dropna(inplace=True)
ind = (stock_data54['return'] >= -0.1) & (stock_data54['return'] <= 0.1)
stock_data54 = stock_data54.loc[ind, :]
stock_data54
plt.plot(stock_data54['return'].values)
Out[24]:
[<matplotlib.lines.Line2D at 0x7fc7696c2dd0>]
In [26]:
stock_data50
Out[26]:
code date close rfreturn return
1 568 2001-01-03 13.00 0.000054 0.023347
2 568 2001-01-04 12.94 0.000054 -0.004626
3 568 2001-01-05 13.41 0.000054 0.035677
4 568 2001-01-08 13.38 0.000054 -0.002240
5 568 2001-01-09 13.01 0.000054 -0.028043
... ... ... ... ... ...
19022 568 2018-12-24 40.16 0.000088 0.012024
19023 568 2018-12-25 40.53 0.000090 0.009171
19024 568 2018-12-26 39.69 0.000091 -0.020943
19025 568 2018-12-27 40.10 0.000091 0.010277
19026 568 2018-12-28 40.66 0.000092 0.013868

4234 rows × 5 columns

In [27]:
stock_data51
Out[27]:
code date close rfreturn return
2438 596 2001-01-03 28.88 0.000054 -0.008962
2439 596 2001-01-04 28.50 0.000054 -0.013245
2440 596 2001-01-05 27.92 0.000054 -0.020561
2441 596 2001-01-08 27.66 0.000054 -0.009356
2442 596 2001-01-09 27.25 0.000054 -0.014934
... ... ... ... ... ...
19759 596 2018-12-24 56.56 0.000088 0.015501
19760 596 2018-12-25 55.93 0.000090 -0.011201
19761 596 2018-12-26 54.24 0.000091 -0.030682
19762 596 2018-12-27 53.57 0.000091 -0.012429
19763 596 2018-12-28 53.96 0.000092 0.007254

4128 rows × 5 columns

In [29]:
stock_data52
Out[29]:
code date close rfreturn return
4878 799 2001-01-03 15.47 0.000054 0.004535
4879 799 2001-01-04 15.49 0.000054 0.001292
4880 799 2001-01-05 15.30 0.000054 -0.012342
4881 799 2001-01-08 15.34 0.000054 0.002611
4882 799 2001-01-09 15.20 0.000054 -0.009168
... ... ... ... ... ...
20491 799 2018-12-24 17.19 0.000088 0.006420
20492 799 2018-12-25 17.07 0.000090 -0.007005
20493 799 2018-12-26 16.98 0.000091 -0.005286
20494 799 2018-12-27 16.08 0.000091 -0.054460
20495 799 2018-12-28 15.98 0.000092 -0.006238

4102 rows × 5 columns

In [30]:
stock_data53
Out[30]:
code date close rfreturn return
7316 858 2001-01-03 40.20 0.000054 0.008745
7317 858 2001-01-04 40.18 0.000054 -0.000498
7318 858 2001-01-05 39.80 0.000054 -0.009502
7319 858 2001-01-08 38.89 0.000054 -0.023130
7320 858 2001-01-09 38.71 0.000054 -0.004639
... ... ... ... ... ...
21224 858 2018-12-24 50.96 0.000088 0.007089
21225 858 2018-12-25 50.86 0.000090 -0.001964
21226 858 2018-12-26 50.41 0.000091 -0.008887
21227 858 2018-12-27 50.15 0.000091 -0.005171
21228 858 2018-12-28 50.88 0.000092 0.014451

4209 rows × 5 columns

In [31]:
stock_data54
Out[31]:
code date close rfreturn return
9753 600809 2001-01-03 10.10 0.000054 0.007952
9754 600809 2001-01-04 9.94 0.000054 -0.015968
9755 600809 2001-01-05 10.15 0.000054 0.020907
9756 600809 2001-01-08 9.98 0.000054 -0.016891
9757 600809 2001-01-09 9.86 0.000054 -0.012097
... ... ... ... ... ...
21957 600809 2018-12-24 36.85 0.000088 0.006807
21958 600809 2018-12-25 36.44 0.000090 -0.011189
21959 600809 2018-12-26 35.66 0.000091 -0.021637
21960 600809 2018-12-27 35.03 0.000091 -0.017825
21961 600809 2018-12-28 35.05 0.000092 0.000571

4281 rows × 5 columns

In [35]:
stock1 = pd.read_csv('1EShowData_data_stock_daily_price_2001-2018.csv',
                    encoding='GB2312',
                    usecols=[1, 5])
stock1.columns = ['date', 'close']
stock1.dropna(inplace=True)
stock1['date'] = pd.to_datetime(stock1['date'])
stock1['return'] = np.log(stock1['close']) - np.log(stock1['close'].shift(periods=1))
stock1.dropna(inplace=True)
ind = (stock1['return'] >= -0.1) & (stock1['return'] <= 0.1)
stock1 = stock1.loc[ind, :]
stock1
Out[35]:
date close return
1 2001-08-28 36.86 0.036187
2 2001-08-29 36.38 -0.013108
3 2001-08-30 37.10 0.019598
4 2001-08-31 37.01 -0.002429
5 2001-09-03 36.99 -0.000541
... ... ... ...
4232 2018-12-24 568.00 0.001039
4233 2018-12-25 565.79 -0.003898
4234 2018-12-26 560.08 -0.010143
4235 2018-12-27 563.00 0.005200
4236 2018-12-28 590.01 0.046860

4125 rows × 3 columns

In [41]:
index = pd.read_csv('1EShowData_data_Index_daily_price_2001-2018.csv', 
                   encoding='GB2312',
                   usecols=[1, 5])
index.columns = ['date', 'close']
index['date'] = pd.to_datetime(index['date'])
index['return'] = np.log(index['close']) - np.log(index['close'].shift(periods=1))
stock1.dropna(inplace=True)
ind = (index['return'] >= -0.1) & (index['return'] <= 0.1)
index = index.loc[ind, :]
index
Out[41]:
date close return
1 2005-01-05 992.56 0.009892
2 2005-01-06 983.17 -0.009505
3 2005-01-07 983.96 0.000803
4 2005-01-10 993.88 0.010031
5 2005-01-11 997.13 0.003265
... ... ... ...
3397 2018-12-24 3038.20 0.002901
3398 2018-12-25 3017.28 -0.006909
3399 2018-12-26 3002.03 -0.005067
3400 2018-12-27 2990.51 -0.003845
3401 2018-12-28 3010.65 0.006712

3401 rows × 3 columns

In [38]:
stock_data
Out[38]:
code date close rfreturn
0 568 2001/1/2 12.70 0.000054
12692 568 2013/1/24 32.77 0.000106
12693 568 2013/1/25 31.66 0.000106
12694 568 2013/1/28 30.11 0.000106
12695 568 2013/1/29 30.46 0.000106
... ... ... ... ...
17077 600809 2011/1/13 62.16 0.000114
17076 600809 2011/1/12 60.80 0.000115
17075 600809 2011/1/11 61.38 0.000117
17157 600809 2011/5/17 70.00 0.000125
21961 600809 2018/12/28 35.05 0.000092

21074 rows × 4 columns

In [53]:
ret_rf = stock_data[['date', 'rfreturn']]
ret_rf.dropna(inplace=True)
ret_rf.drop_duplicates(inplace=True, subset=['date'])
ret_rf.sort_values(by=['date'], inplace=True)
ret_rf['date'] = pd.to_datetime(ret_rf['date'])
ret_rf
Out[53]:
date rfreturn
6 2001-01-10 0.000054
7 2001-01-11 0.000054
8 2001-01-12 0.000054
9 2001-01-15 0.000054
10 2001-01-16 0.000054
... ... ...
18948 2018-09-03 0.000079
18949 2018-09-04 0.000079
18950 2018-09-05 0.000078
18951 2018-09-06 0.000078
18952 2018-09-07 0.000078

4363 rows × 2 columns

stock1, stock_data50, stock_data51, stock_data_52, stock_data_53, stock_data_53, index, rf_return

In [43]:
data_matrix = pd.merge(left=index[['date', 'return']], 
                       right=stock1[['date', 'return']],
                      on='date',
                      how='inner',
                      sort=True)
data_matrix
Out[43]:
date return_x return_y
0 2005-01-05 0.009892 0.021442
1 2005-01-06 -0.009505 -0.017335
2 2005-01-07 0.000803 0.008163
3 2005-01-10 0.010031 0.035932
4 2005-01-11 0.003265 0.001306
... ... ... ...
3321 2018-12-24 0.002901 0.001039
3322 2018-12-25 -0.006909 -0.003898
3323 2018-12-26 -0.005067 -0.010143
3324 2018-12-27 -0.003845 0.005200
3325 2018-12-28 0.006712 0.046860

3326 rows × 3 columns

In [44]:
data_matrix = pd.merge(left=data_matrix, 
                       right=stock_data50[['date', 'return']],
                      on='date',
                      how='inner',
                      sort=True)
data_matrix
Out[44]:
date return_x return_y return
0 2005-01-05 0.009892 0.021442 0.014065
1 2005-01-06 -0.009505 -0.017335 -0.002797
2 2005-01-07 0.000803 0.008163 0.008368
3 2005-01-10 0.010031 0.035932 0.000000
4 2005-01-11 0.003265 0.001306 0.011050
... ... ... ... ...
3206 2018-12-24 0.002901 0.001039 0.012024
3207 2018-12-25 -0.006909 -0.003898 0.009171
3208 2018-12-26 -0.005067 -0.010143 -0.020943
3209 2018-12-27 -0.003845 0.005200 0.010277
3210 2018-12-28 0.006712 0.046860 0.013868

3211 rows × 4 columns

In [45]:
data_matrix = pd.merge(left=data_matrix, 
                       right=stock_data51[['date', 'return']],
                      on='date',
                      how='inner',
                      sort=True)
data_matrix
Out[45]:
date return_x return_y return_x return_y
0 2005-01-05 0.009892 0.021442 0.014065 0.019452
1 2005-01-06 -0.009505 -0.017335 -0.002797 -0.017668
2 2005-01-07 0.000803 0.008163 0.008368 0.007105
3 2005-01-10 0.010031 0.035932 0.000000 0.012313
4 2005-01-11 0.003265 0.001306 0.011050 0.001747
... ... ... ... ... ...
3043 2018-12-24 0.002901 0.001039 0.012024 0.015501
3044 2018-12-25 -0.006909 -0.003898 0.009171 -0.011201
3045 2018-12-26 -0.005067 -0.010143 -0.020943 -0.030682
3046 2018-12-27 -0.003845 0.005200 0.010277 -0.012429
3047 2018-12-28 0.006712 0.046860 0.013868 0.007254

3048 rows × 5 columns

In [46]:
data_matrix = pd.merge(left=data_matrix, 
                       right=stock_data52[['date', 'return']],
                      on='date',
                      how='inner',
                      sort=True)
data_matrix
Out[46]:
date return_x return_y return_x return_y return
0 2005-01-05 0.009892 0.021442 0.014065 0.019452 0.011800
1 2005-01-06 -0.009505 -0.017335 -0.002797 -0.017668 -0.008837
2 2005-01-07 0.000803 0.008163 0.008368 0.007105 0.000000
3 2005-01-10 0.010031 0.035932 0.000000 0.012313 0.049072
4 2005-01-11 0.003265 0.001306 0.011050 0.001747 0.005618
... ... ... ... ... ... ...
2857 2018-12-24 0.002901 0.001039 0.012024 0.015501 0.006420
2858 2018-12-25 -0.006909 -0.003898 0.009171 -0.011201 -0.007005
2859 2018-12-26 -0.005067 -0.010143 -0.020943 -0.030682 -0.005286
2860 2018-12-27 -0.003845 0.005200 0.010277 -0.012429 -0.054460
2861 2018-12-28 0.006712 0.046860 0.013868 0.007254 -0.006238

2862 rows × 6 columns

In [47]:
data_matrix = pd.merge(left=data_matrix, 
                       right=stock_data53[['date', 'return']],
                      on='date',
                      how='inner',
                      sort=True)
data_matrix
Out[47]:
date return_x return_y return_x return_y return_x return_y
0 2005-01-05 0.009892 0.021442 0.014065 0.019452 0.011800 0.015061
1 2005-01-06 -0.009505 -0.017335 -0.002797 -0.017668 -0.008837 -0.002994
2 2005-01-07 0.000803 0.008163 0.008368 0.007105 0.000000 0.010440
3 2005-01-10 0.010031 0.035932 0.000000 0.012313 0.049072 0.011800
4 2005-01-11 0.003265 0.001306 0.011050 0.001747 0.005618 -0.002937
... ... ... ... ... ... ... ...
2818 2018-12-24 0.002901 0.001039 0.012024 0.015501 0.006420 0.007089
2819 2018-12-25 -0.006909 -0.003898 0.009171 -0.011201 -0.007005 -0.001964
2820 2018-12-26 -0.005067 -0.010143 -0.020943 -0.030682 -0.005286 -0.008887
2821 2018-12-27 -0.003845 0.005200 0.010277 -0.012429 -0.054460 -0.005171
2822 2018-12-28 0.006712 0.046860 0.013868 0.007254 -0.006238 0.014451

2823 rows × 7 columns

In [48]:
data_matrix = pd.merge(left=data_matrix, 
                       right=stock_data54[['date', 'return']],
                      on='date',
                      how='inner',
                      sort=True)
data_matrix
Out[48]:
date return_x return_y return_x return_y return_x return_y return
0 2005-01-05 0.009892 0.021442 0.014065 0.019452 0.011800 0.015061 0.022877
1 2005-01-06 -0.009505 -0.017335 -0.002797 -0.017668 -0.008837 -0.002994 -0.003236
2 2005-01-07 0.000803 0.008163 0.008368 0.007105 0.000000 0.010440 0.019262
3 2005-01-10 0.010031 0.035932 0.000000 0.012313 0.049072 0.011800 -0.003185
4 2005-01-11 0.003265 0.001306 0.011050 0.001747 0.005618 -0.002937 -0.001596
... ... ... ... ... ... ... ... ...
2790 2018-12-24 0.002901 0.001039 0.012024 0.015501 0.006420 0.007089 0.006807
2791 2018-12-25 -0.006909 -0.003898 0.009171 -0.011201 -0.007005 -0.001964 -0.011189
2792 2018-12-26 -0.005067 -0.010143 -0.020943 -0.030682 -0.005286 -0.008887 -0.021637
2793 2018-12-27 -0.003845 0.005200 0.010277 -0.012429 -0.054460 -0.005171 -0.017825
2794 2018-12-28 0.006712 0.046860 0.013868 0.007254 -0.006238 0.014451 0.000571

2795 rows × 8 columns

In [54]:
data_matrix = pd.merge(left=data_matrix, 
                       right=ret_rf,
                      on='date',
                      how='inner',
                      sort=True)
data_matrix
Out[54]:
date return_x return_y return_x return_y return_x return_y return rfreturn
0 2005-01-05 0.009892 0.021442 0.014065 0.019452 0.011800 0.015061 0.022877 0.000073
1 2005-01-06 -0.009505 -0.017335 -0.002797 -0.017668 -0.008837 -0.002994 -0.003236 0.000071
2 2005-01-07 0.000803 0.008163 0.008368 0.007105 0.000000 0.010440 0.019262 0.000071
3 2005-01-10 0.010031 0.035932 0.000000 0.012313 0.049072 0.011800 -0.003185 0.000071
4 2005-01-11 0.003265 0.001306 0.011050 0.001747 0.005618 -0.002937 -0.001596 0.000071
... ... ... ... ... ... ... ... ... ...
2790 2018-12-24 0.002901 0.001039 0.012024 0.015501 0.006420 0.007089 0.006807 0.000088
2791 2018-12-25 -0.006909 -0.003898 0.009171 -0.011201 -0.007005 -0.001964 -0.011189 0.000090
2792 2018-12-26 -0.005067 -0.010143 -0.020943 -0.030682 -0.005286 -0.008887 -0.021637 0.000091
2793 2018-12-27 -0.003845 0.005200 0.010277 -0.012429 -0.054460 -0.005171 -0.017825 0.000091
2794 2018-12-28 0.006712 0.046860 0.013868 0.007254 -0.006238 0.014451 0.000571 0.000092

2795 rows × 9 columns

In [55]:
data_matrix.columns = ['date', 'ind', 'stk1', 'stk2', 'stk3', 'stk4', 'stk5', 'stk6', 'rf']
data_matrix
Out[55]:
date ind stk1 stk2 stk3 stk4 stk5 stk6 rf
0 2005-01-05 0.009892 0.021442 0.014065 0.019452 0.011800 0.015061 0.022877 0.000073
1 2005-01-06 -0.009505 -0.017335 -0.002797 -0.017668 -0.008837 -0.002994 -0.003236 0.000071
2 2005-01-07 0.000803 0.008163 0.008368 0.007105 0.000000 0.010440 0.019262 0.000071
3 2005-01-10 0.010031 0.035932 0.000000 0.012313 0.049072 0.011800 -0.003185 0.000071
4 2005-01-11 0.003265 0.001306 0.011050 0.001747 0.005618 -0.002937 -0.001596 0.000071
... ... ... ... ... ... ... ... ... ...
2790 2018-12-24 0.002901 0.001039 0.012024 0.015501 0.006420 0.007089 0.006807 0.000088
2791 2018-12-25 -0.006909 -0.003898 0.009171 -0.011201 -0.007005 -0.001964 -0.011189 0.000090
2792 2018-12-26 -0.005067 -0.010143 -0.020943 -0.030682 -0.005286 -0.008887 -0.021637 0.000091
2793 2018-12-27 -0.003845 0.005200 0.010277 -0.012429 -0.054460 -0.005171 -0.017825 0.000091
2794 2018-12-28 0.006712 0.046860 0.013868 0.007254 -0.006238 0.014451 0.000571 0.000092

2795 rows × 9 columns

In [56]:
data_matrix['ind'] = data_matrix['ind'] - data_matrix['rf']
data_matrix['stk1'] = data_matrix['stk1'] - data_matrix['rf']
data_matrix['stk2'] = data_matrix['stk2'] - data_matrix['rf']
data_matrix['stk3'] = data_matrix['stk3'] - data_matrix['rf']
data_matrix['stk4'] = data_matrix['stk4'] - data_matrix['rf']
data_matrix['stk5'] = data_matrix['stk5'] - data_matrix['rf']
data_matrix['stk6'] = data_matrix['stk6'] - data_matrix['rf']
data_matrix
Out[56]:
date ind stk1 stk2 stk3 stk4 stk5 stk6 rf
0 2005-01-05 0.009819 0.021369 0.013992 0.019379 0.011727 0.014988 0.022804 0.000073
1 2005-01-06 -0.009576 -0.017406 -0.002868 -0.017739 -0.008908 -0.003065 -0.003307 0.000071
2 2005-01-07 0.000732 0.008092 0.008297 0.007034 -0.000071 0.010369 0.019191 0.000071
3 2005-01-10 0.009960 0.035861 -0.000071 0.012242 0.049001 0.011729 -0.003256 0.000071
4 2005-01-11 0.003194 0.001235 0.010979 0.001676 0.005547 -0.003008 -0.001667 0.000071
... ... ... ... ... ... ... ... ... ...
2790 2018-12-24 0.002813 0.000951 0.011936 0.015413 0.006332 0.007001 0.006719 0.000088
2791 2018-12-25 -0.006999 -0.003988 0.009081 -0.011291 -0.007095 -0.002054 -0.011279 0.000090
2792 2018-12-26 -0.005158 -0.010234 -0.021034 -0.030773 -0.005377 -0.008978 -0.021728 0.000091
2793 2018-12-27 -0.003936 0.005109 0.010186 -0.012520 -0.054551 -0.005262 -0.017916 0.000091
2794 2018-12-28 0.006620 0.046768 0.013776 0.007162 -0.006330 0.014359 0.000479 0.000092

2795 rows × 9 columns

In [57]:
ret_ind = data_matrix['ind'].values
T = len(ret_ind)
N = 6
mu_market = np.mean(ret_ind)
sigma_market = np.sum((ret_ind - mu_market)**2)/T
ret_stocks = data_matrix[['stk1', 'stk2', 'stk3', 'stk4', 'stk5', 'stk6']].values
In [58]:
x = sm.add_constant(ret_ind)
y = ret_stocks[:, 3]
model = sm.OLS(y,x)
results = model.fit()
print(results.summary())
                            OLS Regression Results                            
==============================================================================
Dep. Variable:                      y   R-squared:                       0.236
Model:                            OLS   Adj. R-squared:                  0.235
Method:                 Least Squares   F-statistic:                     861.1
Date:                Thu, 17 Mar 2022   Prob (F-statistic):          3.20e-165
Time:                        20:28:47   Log-Likelihood:                 6349.9
No. Observations:                2795   AIC:                        -1.270e+04
Df Residuals:                    2793   BIC:                        -1.268e+04
Df Model:                           1                                         
Covariance Type:            nonrobust                                         
==============================================================================
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const          0.0004      0.000      0.814      0.416      -0.001       0.001
x1             0.8847      0.030     29.345      0.000       0.826       0.944
==============================================================================
Omnibus:                      365.307   Durbin-Watson:                   1.866
Prob(Omnibus):                  0.000   Jarque-Bera (JB):             1029.531
Skew:                           0.699   Prob(JB):                    2.76e-224
Kurtosis:                       5.624   Cond. No.                         63.9
==============================================================================

Notes:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.

无限制模型参数估计

In [59]:
x = np.ones((T, 2))
x[:, 1] = ret_ind
y = ret_stocks
xTx = np.dot(np.transpose(x), x)
xTy = np.dot(np.transpose(x), y)
AB_hat = np.dot(np.linalg.inv(xTx), xTy)
ALPHA = AB_hat[0]
print(ALPHA)
BETA = AB_hat[1]
RESD = y - np.dot(x, AB_hat)
COV = np.dot(np.transpose(RESD), RESD)/T
invCOV = np.linalg.inv(COV)
[0.00080167 0.00056553 0.00097406 0.0003848  0.00040081 0.00076225]

限制模型参数估计

In [60]:
xr = np.ones((T, 1))
xr[:, 0] = ret_ind
yr = ret_stocks
xrTxr = np.dot(np.transpose(xr), xr)
xrTyr = np.dot(np.transpose(xr), yr)
ABr_hat = np.dot(np.linalg.inv(xrTxr), xrTyr)
RESDr = yr - np.dot(xr, ABr_hat)
COVr = np.dot(np.transpose(RESDr), RESDr)/T
invCOVr = np.linalg.inv(COVr)
In [61]:
trans_ALPHA = np.ones((len(ALPHA), 1))
trans_ALPHA[:, 0] = ALPHA
SWchi2 = T*(1/(1+mu_market**2/sigma_market))*np.dot(np.dot(ALPHA, invCOV), trans_ALPHA)
SWF = (T-N-1)/N*(1/(1+mu_market**2/sigma_market))*np.dot(np.dot(ALPHA, invCOV), trans_ALPHA)
pvalue_Wchi2 = 1 - chi2.cdf(SWchi2[0], N)
pvalue_WF = 1 - f.cdf(SWF[0], N, T-N-1)
print(pvalue_Wchi2)
print(pvalue_WF)
0.2222152838631707
0.2240747152703364
In [63]:
SLRchi2 = T*(np.log(np.linalg.det(COVr)) - np.log(np.linalg.det(COV)))
pvalue_SLRchi2 = 1 - chi2.cdf(SLRchi2, N)
print(pvalue_SLRchi2)
0.22305244629253163
In [64]:
a = np.zeros((6, 1))
a[:, 0] = np.sum(RESDr, axis=0)
salpha = np.dot(invCOVr, a)
b = np.dot(ret_ind, RESDr)
sbeta = np.zeros((6, 1))
sbeta[:, 0] = np.dot(invCOVr, b)
score = np.concatenate((salpha, sbeta), axis=0)
In [65]:
a = np.concatenate((invCOVr*T, invCOVr*np.sum(ret_ind)), axis=1)
b = np.concatenate((invCOVr*np.sum(ret_ind), invCOVr*np.sum(ret_ind**2)), axis=1)
Minfo = np.concatenate((a, b), axis=0)
SLMchi2 = np.dot(np.dot(np.transpose(score), np.linalg.inv(Minfo)), score)
pvalue_SLMchi2 = 1 - chi2.cdf(SLMchi2[0][0], N)
print(pvalue_SLMchi2)
0.22389055874285257
In [66]:
print('{:>10s}, {:>10s}, {:>10s}, {:>10s}'.format('Wald Test1', 'Wald Test2', 'LR Test', 'LM Test'))
print('{:10.5f}, {:10.5f}, {:10.5f}, {:10.5f}'.format(SWchi2[0], SWF[0], SLRchi2, SLMchi2[0][0]))
print('{:10.5f}, {:10.5f}, {:10.5f}, {:10.5f}'.format(pvalue_Wchi2, pvalue_WF, pvalue_SLRchi2, pvalue_SLMchi2))
Wald Test1, Wald Test2,    LR Test,    LM Test
   8.22302,    1.36707,    8.21095,    8.19890
   0.22222,    0.22407,    0.22305,    0.22389