华东理工大学《金融计算》实验课

三因子模型的实证检验

蒋志强

In [2]:
import numpy as np
import pandas as pd
import statsmodels.api as sm
from scipy.stats import f
In [4]:
data_factors = pd.read_csv('Data_FFFactors.csv', encoding='GB2312', usecols=[2, 6, 7, 8])
data_factors.columns = ['date', 'mkt', 'smb', 'hml']
data_factors['date'] = pd.to_datetime(data_factors['date'])
data_factors['yearmonth'] = data_factors['date'].dt.strftime('%Y%m')
data_factors
Out[4]:
date mkt smb hml yearmonth
0 2009-01-23 0.0934 0.0516 -0.0265 200901
1 2009-02-27 0.0461 0.0417 0.0116 200902
2 2009-03-31 0.1397 0.0565 -0.0355 200903
3 2009-04-30 0.0441 0.0071 0.0293 200904
4 2009-05-27 0.0626 0.0429 -0.0040 200905
... ... ... ... ... ...
103 2017-08-31 0.0246 0.0078 -0.0146 201708
104 2017-09-29 -0.0062 -0.0116 -0.0295 201709
105 2017-10-31 0.0110 -0.0609 -0.0310 201710
106 2017-11-30 -0.0259 -0.0371 0.0280 201711
107 2017-12-29 -0.0069 -0.0398 -0.0195 201712

108 rows × 5 columns

In [6]:
data_index = pd.read_csv('Data_Index.csv', encoding='GB2312', usecols=[1, 2, 4])
data_index.columns = ['idxname', 'date', 'return']
data_index['date'] = pd.to_datetime(data_index['date'])
data_index['yearmonth'] = data_index['date'].dt.strftime('%Y%m')
data_index
Out[6]:
idxname date return yearmonth
0 上证指数 1997-01-31 0.0520 199701
1 上证指数 1997-02-28 0.0783 199702
2 上证指数 1997-03-31 0.1868 199703
3 上证指数 1997-04-30 0.1289 199704
4 上证指数 1997-05-30 -0.0779 199705
... ... ... ... ...
1327 上证公用 2017-08-31 -0.0096 201708
1328 上证公用 2017-09-29 -0.0234 201709
1329 上证公用 2017-10-31 0.0056 201710
1330 上证公用 2017-11-30 -0.0238 201711
1331 上证公用 2017-12-29 -0.0246 201712

1332 rows × 4 columns

In [7]:
idxname = np.unique(data_index['idxname'].values)
idxname
Out[7]:
array(['上证信息', '上证公用', '上证医药', '上证可选', '上证工业', '上证指数', '上证材料', '上证消费',
       '上证电信', '上证能源', '上证金融'], dtype=object)
In [9]:
data_xinxi = data_index[data_index['idxname'] == idxname[0]]
data_xinxi
Out[9]:
idxname date return yearmonth
1008 上证信息 2009-01-23 NaN 200901
1009 上证信息 2009-02-27 0.0560 200902
1010 上证信息 2009-03-31 0.1915 200903
1011 上证信息 2009-04-30 0.0440 200904
1012 上证信息 2009-05-27 0.0365 200905
... ... ... ... ...
1111 上证信息 2017-08-31 0.0562 201708
1112 上证信息 2017-09-29 0.0490 201709
1113 上证信息 2017-10-31 0.0017 201710
1114 上证信息 2017-11-30 -0.0257 201711
1115 上证信息 2017-12-29 -0.0154 201712

108 rows × 4 columns

In [11]:
data_gongyong = data_index[data_index['idxname'] == idxname[1]]
data_yiyao = data_index[data_index['idxname'] == idxname[2]]
data_kexuan = data_index[data_index['idxname'] == idxname[3]]
data_gongye = data_index[data_index['idxname'] == idxname[4]]
data_cailiao = data_index[data_index['idxname'] == idxname[6]]
data_xiaofei = data_index[data_index['idxname'] == idxname[7]]
data_dianxin = data_index[data_index['idxname'] == idxname[8]]
data_nengyuan = data_index[data_index['idxname'] == idxname[9]]
data_jinrong = data_index[data_index['idxname'] == idxname[10]]
In [15]:
data_rf = pd.read_csv('Data_RiskFreeReturn.csv', encoding='GB2312', usecols=[0, 3])
data_rf.columns = ['date', 'rfreturn']
data_rf['date'] = pd.to_datetime(data_rf['date'])
data_rf['yearmonth'] = data_rf['date'].dt.strftime('%Y%m')
data_rf
Out[15]:
date rfreturn yearmonth
0 2009-01-01 0.001256 200901
1 2009-02-01 0.001088 200902
2 2009-03-01 0.001041 200903
3 2009-04-01 0.001013 200904
4 2009-05-01 0.001010 200905
... ... ... ...
103 2017-08-01 0.003602 201708
104 2017-09-01 0.003650 201709
105 2017-10-01 0.003641 201710
106 2017-11-01 0.003798 201711
107 2017-12-01 0.004030 201712

108 rows × 3 columns

In [16]:
data_matrix = pd.merge(left=data_factors[['yearmonth', 'date', 'mkt', 'smb', 'hml']],
                      right=data_xinxi[['yearmonth', 'return']],
                      on=['yearmonth'],
                      how='inner')
data_matrix
Out[16]:
yearmonth date mkt smb hml return
0 200901 2009-01-23 0.0934 0.0516 -0.0265 NaN
1 200902 2009-02-27 0.0461 0.0417 0.0116 0.0560
2 200903 2009-03-31 0.1397 0.0565 -0.0355 0.1915
3 200904 2009-04-30 0.0441 0.0071 0.0293 0.0440
4 200905 2009-05-27 0.0626 0.0429 -0.0040 0.0365
... ... ... ... ... ... ...
103 201708 2017-08-31 0.0246 0.0078 -0.0146 0.0562
104 201709 2017-09-29 -0.0062 -0.0116 -0.0295 0.0490
105 201710 2017-10-31 0.0110 -0.0609 -0.0310 0.0017
106 201711 2017-11-30 -0.0259 -0.0371 0.0280 -0.0257
107 201712 2017-12-29 -0.0069 -0.0398 -0.0195 -0.0154

108 rows × 6 columns

In [17]:
data_matrix = pd.merge(left=data_matrix,
                      right=data_gongyong[['yearmonth', 'return']],
                      on=['yearmonth'],
                      how='inner')
data_matrix = pd.merge(left=data_matrix,
                      right=data_yiyao[['yearmonth', 'return']],
                      on=['yearmonth'],
                      how='inner')
data_matrix = pd.merge(left=data_matrix,
                      right=data_kexuan[['yearmonth', 'return']],
                      on=['yearmonth'],
                      how='inner')
data_matrix = pd.merge(left=data_matrix,
                      right=data_gongye[['yearmonth', 'return']],
                      on=['yearmonth'],
                      how='inner')
data_matrix = pd.merge(left=data_matrix,
                      right=data_cailiao[['yearmonth', 'return']],
                      on=['yearmonth'],
                      how='inner')
data_matrix = pd.merge(left=data_matrix,
                      right=data_xiaofei[['yearmonth', 'return']],
                      on=['yearmonth'],
                      how='inner')
data_matrix = pd.merge(left=data_matrix,
                      right=data_dianxin[['yearmonth', 'return']],
                      on=['yearmonth'],
                      how='inner')
data_matrix = pd.merge(left=data_matrix,
                      right=data_nengyuan[['yearmonth', 'return']],
                      on=['yearmonth'],
                      how='inner')
data_matrix = pd.merge(left=data_matrix,
                      right=data_jinrong[['yearmonth', 'return']],
                      on=['yearmonth'],
                      how='inner')
data_matrix = pd.merge(left=data_matrix,
                      right=data_rf[['yearmonth', 'rfreturn']],
                      on=['yearmonth'],
                      how='inner')
data_matrix
/home/matlab/anaconda3/lib/python3.7/site-packages/ipykernel_launcher.py:12: FutureWarning: Passing 'suffixes' which cause duplicate columns {'return_x'} in the result is deprecated and will raise a MergeError in a future version.
  if sys.path[0] == '':
/home/matlab/anaconda3/lib/python3.7/site-packages/ipykernel_launcher.py:20: FutureWarning: Passing 'suffixes' which cause duplicate columns {'return_x'} in the result is deprecated and will raise a MergeError in a future version.
/home/matlab/anaconda3/lib/python3.7/site-packages/ipykernel_launcher.py:28: FutureWarning: Passing 'suffixes' which cause duplicate columns {'return_x'} in the result is deprecated and will raise a MergeError in a future version.
/home/matlab/anaconda3/lib/python3.7/site-packages/ipykernel_launcher.py:36: FutureWarning: Passing 'suffixes' which cause duplicate columns {'return_x'} in the result is deprecated and will raise a MergeError in a future version.
Out[17]:
yearmonth date mkt smb hml return_x return_y return_x return_y return_x return_y return_x return_y return_x return_y rfreturn
0 200901 2009-01-23 0.0934 0.0516 -0.0265 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN 0.001256
1 200902 2009-02-27 0.0461 0.0417 0.0116 0.0560 0.0609 0.0379 0.0954 0.0330 0.0947 0.1007 0.0331 -0.0270 0.0347 0.001088
2 200903 2009-03-31 0.1397 0.0565 -0.0355 0.1915 0.1698 0.1050 0.1934 0.1909 0.2219 0.0892 0.1568 0.1794 0.1739 0.001041
3 200904 2009-04-30 0.0441 0.0071 0.0293 0.0440 0.0132 0.0639 0.1022 0.0329 0.0102 0.0642 0.1561 0.1426 0.0281 0.001013
4 200905 2009-05-27 0.0626 0.0429 -0.0040 0.0365 0.0475 -0.0395 0.0552 0.0207 0.0456 -0.0026 -0.0582 0.1239 0.0731 0.001010
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
103 201708 2017-08-31 0.0246 0.0078 -0.0146 0.0562 -0.0096 0.0172 -0.0027 -0.0030 0.0432 0.0310 0.0440 0.0097 0.0333 0.003602
104 201709 2017-09-29 -0.0062 -0.0116 -0.0295 0.0490 -0.0234 0.0249 0.0307 -0.0219 -0.0042 0.0816 0.1165 -0.0055 -0.0230 0.003650
105 201710 2017-10-31 0.0110 -0.0609 -0.0310 0.0017 0.0056 0.0830 0.0258 0.0392 -0.0340 0.0789 -0.0105 -0.0357 0.0329 0.003641
106 201711 2017-11-30 -0.0259 -0.0371 0.0280 -0.0257 -0.0238 -0.0380 -0.0452 -0.0228 0.0178 -0.0435 0.0152 0.0209 0.0355 0.003798
107 201712 2017-12-29 -0.0069 -0.0398 -0.0195 -0.0154 -0.0246 0.0426 0.0226 -0.0029 0.0010 0.0797 -0.0434 -0.0003 -0.0167 0.004030

108 rows × 16 columns

In [19]:
data_matrix.columns = ['yearmonth', 'date', 'mkt', 'smb', 'hml', 'xinxi', 'gongyong', 'yiyao', 'kexuan',
                       'gongye', 'cailiao', 'xiaofei', 'dianxin', 'nengyuan', 'jinrong', 'rfreturn']
data_matrix.dropna(inplace=True)
data_matrix.sort_values(by='date', inplace=True)
data_matrix
Out[19]:
yearmonth date mkt smb hml xinxi gongyong yiyao kexuan gongye cailiao xiaofei dianxin nengyuan jinrong rfreturn
1 200902 2009-02-27 0.0461 0.0417 0.0116 0.0560 0.0609 0.0379 0.0954 0.0330 0.0947 0.1007 0.0331 -0.0270 0.0347 0.001088
2 200903 2009-03-31 0.1397 0.0565 -0.0355 0.1915 0.1698 0.1050 0.1934 0.1909 0.2219 0.0892 0.1568 0.1794 0.1739 0.001041
3 200904 2009-04-30 0.0441 0.0071 0.0293 0.0440 0.0132 0.0639 0.1022 0.0329 0.0102 0.0642 0.1561 0.1426 0.0281 0.001013
4 200905 2009-05-27 0.0626 0.0429 -0.0040 0.0365 0.0475 -0.0395 0.0552 0.0207 0.0456 -0.0026 -0.0582 0.1239 0.0731 0.001010
5 200906 2009-06-30 0.1331 -0.0208 -0.0079 0.0041 0.0220 0.0775 0.0434 0.0612 0.1364 0.0918 0.0841 0.0935 0.2472 0.001043
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
103 201708 2017-08-31 0.0246 0.0078 -0.0146 0.0562 -0.0096 0.0172 -0.0027 -0.0030 0.0432 0.0310 0.0440 0.0097 0.0333 0.003602
104 201709 2017-09-29 -0.0062 -0.0116 -0.0295 0.0490 -0.0234 0.0249 0.0307 -0.0219 -0.0042 0.0816 0.1165 -0.0055 -0.0230 0.003650
105 201710 2017-10-31 0.0110 -0.0609 -0.0310 0.0017 0.0056 0.0830 0.0258 0.0392 -0.0340 0.0789 -0.0105 -0.0357 0.0329 0.003641
106 201711 2017-11-30 -0.0259 -0.0371 0.0280 -0.0257 -0.0238 -0.0380 -0.0452 -0.0228 0.0178 -0.0435 0.0152 0.0209 0.0355 0.003798
107 201712 2017-12-29 -0.0069 -0.0398 -0.0195 -0.0154 -0.0246 0.0426 0.0226 -0.0029 0.0010 0.0797 -0.0434 -0.0003 -0.0167 0.004030

107 rows × 16 columns

In [20]:
x = data_matrix.loc[:, ['mkt', 'smb', 'hml']].values
ret_rf = data_matrix.loc[:, ['rfreturn']].values
ret_stock = data_matrix.loc[:, ['nengyuan']].values

# 单资产检验
X = sm.add_constant(x)
Y = ret_stock - ret_rf
model = sm.OLS(Y, X)
results = model.fit()
print(results.summary())
                            OLS Regression Results                            
==============================================================================
Dep. Variable:                      y   R-squared:                       0.793
Model:                            OLS   Adj. R-squared:                  0.787
Method:                 Least Squares   F-statistic:                     131.4
Date:                Thu, 31 Mar 2022   Prob (F-statistic):           4.55e-35
Time:                        18:29:41   Log-Likelihood:                 191.69
No. Observations:                 107   AIC:                            -375.4
Df Residuals:                     103   BIC:                            -364.7
Df Model:                           3                                         
Covariance Type:            nonrobust                                         
==============================================================================
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const         -0.0068      0.004     -1.637      0.105      -0.015       0.001
x1             1.0711      0.058     18.360      0.000       0.955       1.187
x2            -0.0370      0.128     -0.288      0.774      -0.292       0.218
x3             0.2413      0.129      1.872      0.064      -0.014       0.497
==============================================================================
Omnibus:                       14.893   Durbin-Watson:                   2.033
Prob(Omnibus):                  0.001   Jarque-Bera (JB):               22.129
Skew:                           0.653   Prob(JB):                     1.57e-05
Kurtosis:                       4.805   Cond. No.                         41.6
==============================================================================

Notes:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
In [26]:
# 多资产检验
T = len(Y)
N = 10
K = 3
y = data_matrix.iloc[:, 5:15].values - data_matrix.loc[:, ['rfreturn']].values
# x = sm.add_constant(x)
xTx = np.dot(np.transpose(x), x)
xTy = np.dot(np.transpose(x), y)
AB_hat = np.dot(np.linalg.inv(xTx), xTy)
ALPHA = AB_hat[0]
ALPHA
Out[26]:
array([ 0.00220998, -0.00475559,  0.00869214,  0.0006012 , -0.00248682,
       -0.00186367,  0.00665327,  0.00108446, -0.00683547,  0.00381172])
In [29]:
RESD = y - np.dot(x, AB_hat)
COV = np.dot(np.transpose(RESD), RESD)/T
invCOV = np.linalg.inv(COV)

fs = x[:, [1, 2, 3]]
muhat = np.mean(fs, axis=0).reshape((3, 1))
fs = fs - np.mean(fs, axis=0)
omegahat = np.dot(np.transpose(fs), fs)/T
invOMG = np.linalg.inv(omegahat)

xxx = np.dot(np.dot(np.transpose(muhat), invOMG), muhat)
yyy = np.dot(np.dot(ALPHA, invCOV), np.transpose(ALPHA))
GRS = (T-N-K)/N*(1/(1+xxx))*yyy
pvalue = 1 - f.cdf(GRS[0][0], N, T-N-K)

print('三因子模型的多资产检验结果')
print('{:>7s},{:>7s},{:>7s},{:>7s},{:>7s},{:>7s},{:>7s},{:>7s},{:>7s}'.format('alpha1', 'alpha2', 'alpha3', 'alpha4', 'alpha5', 'alpha6', 'alpha7', 'GRS', 'pvalue'))
print('{:7.4f},{:7.4f},{:7.4f},{:7.4f},{:7.4f},{:7.4f},{:7.4f},{:7.4f},{:7.4f}'.format(ALPHA[0], ALPHA[1], ALPHA[2], ALPHA[3], ALPHA[4], ALPHA[5], ALPHA[6], GRS[0][0], pvalue))
三因子模型的多资产检验结果
 alpha1, alpha2, alpha3, alpha4, alpha5, alpha6, alpha7,    GRS, pvalue
 0.0022,-0.0048, 0.0087, 0.0006,-0.0025,-0.0019, 0.0067, 1.4182, 0.1843