华东理工大学《金融计算》

中国股市收益率可预测性实证研究

蒋志强

In [2]:
import numpy as np
import pandas as pd
from math import pi
import warnings
warnings.filterwarnings('ignore')

import statsmodels.api as sm
import statsmodels.formula.api as smf
import sklearn.linear_model as sklm
import matplotlib.pyplot as plt
In [3]:
data = pd.read_excel('1EData_PredictorData2019.xlsx', sheet_name='Monthly')
data
Out[3]:
yyyymm Index D12 E12 b/m tbl AAA BAA lty ntis Rfree infl ltr corpr svar csp CRSP_SPvw CRSP_SPvwx PPIG IPG
0 187101 4.440000 0.260000 0.40 NaN NaN NaN NaN NaN NaN 0.004967 NaN NaN NaN NaN NaN NaN NaN NaN NaN
1 187102 4.500000 0.260000 0.40 NaN NaN NaN NaN NaN NaN 0.004525 NaN NaN NaN NaN NaN NaN NaN NaN NaN
2 187103 4.610000 0.260000 0.40 NaN NaN NaN NaN NaN NaN 0.004252 NaN NaN NaN NaN NaN NaN NaN NaN NaN
3 187104 4.740000 0.260000 0.40 NaN NaN NaN NaN NaN NaN 0.004643 NaN NaN NaN NaN NaN NaN NaN NaN NaN
4 187105 4.860000 0.260000 0.40 NaN NaN NaN NaN NaN NaN 0.003698 NaN NaN NaN NaN NaN NaN NaN NaN NaN
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
1783 201908 2926.459961 56.838763 133.69 0.237917 0.0210 0.0298 0.0387 0.0163 -0.010244 0.001625 -0.000051 0.0797 0.0738 0.004318 NaN -0.016085 -0.018377 -0.747384 0.705045
1784 201909 2976.739990 57.219507 132.90 0.233377 0.0195 0.0303 0.0391 0.0170 -0.010959 0.001575 0.000783 -0.0192 -0.0190 0.000605 NaN 0.018791 0.017272 -0.401606 -0.347551
1785 201910 3037.560059 57.559879 135.09 0.232261 0.0189 0.0301 0.0392 0.0171 -0.013267 0.001375 0.002286 -0.0052 0.0006 0.001510 NaN 0.021621 0.020441 0.100806 -0.406952
1786 201911 3140.979980 57.900251 137.28 0.223938 0.0165 0.0306 0.0394 0.0181 -0.007907 0.001283 -0.000536 -0.0059 0.0014 0.000306 NaN 0.036206 0.033979 0.201410 0.928027
1787 201912 3230.780029 58.240623 139.47 0.220116 0.0154 0.0301 0.0388 0.0186 -0.007306 0.001283 -0.000910 -0.0253 -0.0089 0.000502 NaN 0.029788 0.028136 0.000000 -0.366325

1788 rows × 20 columns

In [4]:
data['DP'] = data['D12'].apply(np.log) - data['Index'].apply(np.log)
data['EP'] = data['E12'].apply(np.log) - data['Index'].apply(np.log)
data['VOL'] = data['CRSP_SPvw'].abs().rolling(window=12).mean()*np.sqrt(pi/6)
data['BILL'] = data['tbl'] - data['tbl'].rolling(window=12).mean()
data['BOND'] = data['lty'] - data['lty'].rolling(window=12).mean()
data['TERM'] = data['lty'] - data['tbl']
data['CREDIT'] = data['AAA'] - data['lty']
data['MA112'] = data['Index'] >= data['Index'].rolling(window=12).mean()
data['MA312']  =data['Index'].rolling(window=3).mean() >= data['Index'].rolling(window=12).mean()
data['MOM6'] = data['Index'] >= data['Index'].shift(periods=6)
data['ExRet'] = data['CRSP_SPvw'] - data['Rfree']
data[['MA112', 'MA312', 'MOM6']] = data[['MA112', 'MA312', 'MOM6']].astype(int)
data
Out[4]:
yyyymm Index D12 E12 b/m tbl AAA BAA lty ntis ... EP VOL BILL BOND TERM CREDIT MA112 MA312 MOM6 ExRet
0 187101 4.440000 0.260000 0.40 NaN NaN NaN NaN NaN NaN ... -2.406945 NaN NaN NaN NaN NaN 0 0 0 NaN
1 187102 4.500000 0.260000 0.40 NaN NaN NaN NaN NaN NaN ... -2.420368 NaN NaN NaN NaN NaN 0 0 0 NaN
2 187103 4.610000 0.260000 0.40 NaN NaN NaN NaN NaN NaN ... -2.444519 NaN NaN NaN NaN NaN 0 0 0 NaN
3 187104 4.740000 0.260000 0.40 NaN NaN NaN NaN NaN NaN ... -2.472328 NaN NaN NaN NaN NaN 0 0 0 NaN
4 187105 4.860000 0.260000 0.40 NaN NaN NaN NaN NaN NaN ... -2.497329 NaN NaN NaN NaN NaN 0 0 0 NaN
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
1783 201908 2926.459961 56.838763 133.69 0.237917 0.0210 0.0298 0.0387 0.0163 -0.010244 ... -3.086025 0.031396 -0.001725 -0.009958 -0.0047 0.0135 1 1 1 -0.017710
1784 201909 2976.739990 57.219507 132.90 0.233377 0.0195 0.0303 0.0391 0.0170 -0.010959 ... -3.108987 0.032219 -0.003158 -0.007892 -0.0025 0.0133 1 1 1 0.017216
1785 201910 3037.560059 57.559879 135.09 0.232261 0.0189 0.0301 0.0392 0.0171 -0.013267 ... -3.112869 0.029398 -0.003558 -0.006283 -0.0018 0.0130 1 1 1 0.020246
1786 201911 3140.979980 57.900251 137.28 0.223938 0.0165 0.0306 0.0394 0.0181 -0.007907 ... -3.130267 0.030390 -0.005458 -0.004150 0.0016 0.0125 1 1 1 0.034923
1787 201912 3230.780029 58.240623 139.47 0.220116 0.0154 0.0301 0.0388 0.0186 -0.007306 ... -3.142629 0.026720 -0.005900 -0.002833 0.0032 0.0115 1 1 1 0.028505

1788 rows × 31 columns

In [5]:
data = pd.concat([data[['yyyymm', 'CRSP_SPvw', 'Rfree', 'ExRet',
                        'DP', 'EP', 'VOL', 'BILL', 'BOND', 'TERM', 'CREDIT', 'PPIG', 'IPG',
                        'MA112', 'MA312', 'MOM6']],
                  data[['DP', 'EP', 'VOL', 'BILL', 'BOND', 'TERM', 'CREDIT', 'PPIG', 'IPG',
                        'MA112', 'MA312', 'MOM6']].shift(periods=1)], axis=1)
data
Out[5]:
yyyymm CRSP_SPvw Rfree ExRet DP EP VOL BILL BOND TERM ... VOL BILL BOND TERM CREDIT PPIG IPG MA112 MA312 MOM6
0 187101 NaN 0.004967 NaN -2.837728 -2.406945 NaN NaN NaN NaN ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
1 187102 NaN 0.004525 NaN -2.851151 -2.420368 NaN NaN NaN NaN ... NaN NaN NaN NaN NaN NaN NaN 0.0 0.0 0.0
2 187103 NaN 0.004252 NaN -2.875302 -2.444519 NaN NaN NaN NaN ... NaN NaN NaN NaN NaN NaN NaN 0.0 0.0 0.0
3 187104 NaN 0.004643 NaN -2.903111 -2.472328 NaN NaN NaN NaN ... NaN NaN NaN NaN NaN NaN NaN 0.0 0.0 0.0
4 187105 NaN 0.003698 NaN -2.928112 -2.497329 NaN NaN NaN NaN ... NaN NaN NaN NaN NaN NaN NaN 0.0 0.0 0.0
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
1783 201908 -0.016085 0.001625 -0.017710 -3.941330 -3.086025 0.031396 -0.001725 -0.009958 -0.0047 ... 0.032412 -0.000908 -0.006742 -0.0011 0.0123 0.199700 -0.175883 1.0 1.0 1.0
1784 201909 0.018791 0.001575 0.017216 -3.951689 -3.108987 0.032219 -0.003158 -0.007892 -0.0025 ... 0.031396 -0.001725 -0.009958 -0.0047 0.0135 -0.747384 0.705045 1.0 1.0 1.0
1785 201910 0.021621 0.001375 0.020246 -3.965984 -3.112869 0.029398 -0.003558 -0.006283 -0.0018 ... 0.032219 -0.003158 -0.007892 -0.0025 0.0133 -0.401606 -0.347551 1.0 1.0 1.0
1786 201911 0.036206 0.001283 0.034923 -3.993568 -3.130267 0.030390 -0.005458 -0.004150 0.0016 ... 0.029398 -0.003558 -0.006283 -0.0018 0.0130 0.100806 -0.406952 1.0 1.0 1.0
1787 201912 0.029788 0.001283 0.028505 -4.015896 -3.142629 0.026720 -0.005900 -0.002833 0.0032 ... 0.030390 -0.005458 -0.004150 0.0016 0.0125 0.201410 0.928027 1.0 1.0 1.0

1788 rows × 28 columns

In [6]:
data.columns = ['yyyymm', 'Ret', 'Rfree', 'ExRet',
                'DP', 'EP', 'VOL', 'BILL', 'BOND', 'TERM', 'CREDIT', 'PPIG', 'IPG',
                'MA112', 'MA312', 'MOM6', 'DPL1',
                'EPL1', 'VOLL1', 'BILLL1', 'BONDL1', 'TERML1', 'CREDITL1', 'PPIGL1', 'IPGL1',
                'MA112L1', 'MA312L1', 'MOM6L1']
data
Out[6]:
yyyymm Ret Rfree ExRet DP EP VOL BILL BOND TERM ... VOLL1 BILLL1 BONDL1 TERML1 CREDITL1 PPIGL1 IPGL1 MA112L1 MA312L1 MOM6L1
0 187101 NaN 0.004967 NaN -2.837728 -2.406945 NaN NaN NaN NaN ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
1 187102 NaN 0.004525 NaN -2.851151 -2.420368 NaN NaN NaN NaN ... NaN NaN NaN NaN NaN NaN NaN 0.0 0.0 0.0
2 187103 NaN 0.004252 NaN -2.875302 -2.444519 NaN NaN NaN NaN ... NaN NaN NaN NaN NaN NaN NaN 0.0 0.0 0.0
3 187104 NaN 0.004643 NaN -2.903111 -2.472328 NaN NaN NaN NaN ... NaN NaN NaN NaN NaN NaN NaN 0.0 0.0 0.0
4 187105 NaN 0.003698 NaN -2.928112 -2.497329 NaN NaN NaN NaN ... NaN NaN NaN NaN NaN NaN NaN 0.0 0.0 0.0
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
1783 201908 -0.016085 0.001625 -0.017710 -3.941330 -3.086025 0.031396 -0.001725 -0.009958 -0.0047 ... 0.032412 -0.000908 -0.006742 -0.0011 0.0123 0.199700 -0.175883 1.0 1.0 1.0
1784 201909 0.018791 0.001575 0.017216 -3.951689 -3.108987 0.032219 -0.003158 -0.007892 -0.0025 ... 0.031396 -0.001725 -0.009958 -0.0047 0.0135 -0.747384 0.705045 1.0 1.0 1.0
1785 201910 0.021621 0.001375 0.020246 -3.965984 -3.112869 0.029398 -0.003558 -0.006283 -0.0018 ... 0.032219 -0.003158 -0.007892 -0.0025 0.0133 -0.401606 -0.347551 1.0 1.0 1.0
1786 201911 0.036206 0.001283 0.034923 -3.993568 -3.130267 0.030390 -0.005458 -0.004150 0.0016 ... 0.029398 -0.003558 -0.006283 -0.0018 0.0130 0.100806 -0.406952 1.0 1.0 1.0
1787 201912 0.029788 0.001283 0.028505 -4.015896 -3.142629 0.026720 -0.005900 -0.002833 0.0032 ... 0.030390 -0.005458 -0.004150 0.0016 0.0125 0.201410 0.928027 1.0 1.0 1.0

1788 rows × 28 columns

In [7]:
data = data[data['yyyymm'] >= 192701]
data.reset_index(drop=True, inplace=True)
data
Out[7]:
yyyymm Ret Rfree ExRet DP EP VOL BILL BOND TERM ... VOLL1 BILLL1 BONDL1 TERML1 CREDITL1 PPIGL1 IPGL1 MA112L1 MA312L1 MOM6L1
0 192701 -0.002910 0.002692 -0.005602 -2.942374 -2.374773 0.022268 -0.001625 -0.001508 0.0044 ... 0.022200 0.000808 -0.001400 0.0019 0.0114 -0.588235 -0.400104 1.0 1.0 1.0
1 192702 0.045522 0.002742 0.042780 -2.979535 -2.430353 0.023005 0.000192 -0.001700 0.0024 ... 0.022268 -0.001625 -0.001508 0.0044 0.0115 -2.958580 -0.401711 1.0 1.0 1.0
2 192703 0.007324 0.002667 0.004657 -2.976535 -2.445079 0.019967 0.000700 -0.002967 0.0002 ... 0.023005 0.000192 -0.001700 0.0024 0.0120 1.219512 0.806663 1.0 1.0 1.0
3 192704 0.013021 0.002825 0.010196 -2.984225 -2.471309 0.018429 -0.000250 -0.002475 0.0013 ... 0.019967 0.000700 -0.002967 0.0002 0.0131 -0.602410 1.200312 1.0 1.0 1.0
4 192705 0.062353 0.002775 0.059578 -3.025963 -2.531446 0.021368 0.001392 -0.002725 -0.0012 ... 0.018429 -0.000250 -0.002475 0.0013 0.0125 -1.212121 -2.372151 1.0 1.0 1.0
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
1111 201908 -0.016085 0.001625 -0.017710 -3.941330 -3.086025 0.031396 -0.001725 -0.009958 -0.0047 ... 0.032412 -0.000908 -0.006742 -0.0011 0.0123 0.199700 -0.175883 1.0 1.0 1.0
1112 201909 0.018791 0.001575 0.017216 -3.951689 -3.108987 0.032219 -0.003158 -0.007892 -0.0025 ... 0.031396 -0.001725 -0.009958 -0.0047 0.0135 -0.747384 0.705045 1.0 1.0 1.0
1113 201910 0.021621 0.001375 0.020246 -3.965984 -3.112869 0.029398 -0.003558 -0.006283 -0.0018 ... 0.032219 -0.003158 -0.007892 -0.0025 0.0133 -0.401606 -0.347551 1.0 1.0 1.0
1114 201911 0.036206 0.001283 0.034923 -3.993568 -3.130267 0.030390 -0.005458 -0.004150 0.0016 ... 0.029398 -0.003558 -0.006283 -0.0018 0.0130 0.100806 -0.406952 1.0 1.0 1.0
1115 201912 0.029788 0.001283 0.028505 -4.015896 -3.142629 0.026720 -0.005900 -0.002833 0.0032 ... 0.030390 -0.005458 -0.004150 0.0016 0.0125 0.201410 0.928027 1.0 1.0 1.0

1116 rows × 28 columns

In [8]:
data['DP'].plot()
Out[8]:
<matplotlib.axes._subplots.AxesSubplot at 0x7fd9cca22c10>
In [9]:
data['EP'].plot()
Out[9]:
<matplotlib.axes._subplots.AxesSubplot at 0x7fd9cc8b8190>
In [10]:
def myfun_stat_gains(rout, rmean, rreal):
    R2os = 1 - np.sum((rreal-rout)**2)/np.sum((rreal-rmean)**2)
    d = (rreal - rmean)**2 - ((rreal-rout)**2 - (rmean-rout)**2)
    x = sm.add_constant(np.arange(len(d))+1)
    model = sm.OLS(d, x)
    fitres = model.fit()
    MFSEadj = fitres.tvalues[0]
    pvalue_MFSEadj = fitres.pvalues[0]

    if (R2os > 0) & (pvalue_MFSEadj <= 0.01):
        jud = '在1%的显著性水平下有样本外预测能力'
    elif (R2os > 0) & (pvalue_MFSEadj > 0.01) & (pvalue_MFSEadj <= 0.05):
        jud = '在5%的显著性水平下有样本外预测能力'
    elif (R2os > 0) & (pvalue_MFSEadj > 0.05) & (pvalue_MFSEadj <= 0.1):
        jud = '在10%的显著性水平下有样本外预测能力'
    else:
        jud = '无样本外预测能力'
    print('Stat gains: R2os = {:f}, MFSEadj = {:f}, MFSEpvalue = {:f}'.format(R2os, MFSEadj, pvalue_MFSEadj))
    print('Inference: {:s}'.format(jud))

    return R2os, MFSEadj, pvalue_MFSEadj
In [11]:
def myfun_econ_gains(rout, rmean, rreal, rfree, volt2, gmm = 5):
    omg_out = rout/volt2/gmm
    rp_out = rfree + omg_out*rreal
    Uout = np.mean(rp_out) - 0.5*gmm*np.var(rp_out)
    omg_mean = rmean/volt2/gmm
    rp_mean = rfree + omg_mean*rreal
    Umean = np.mean(rp_mean) - 0.5*gmm*np.var(rp_mean)
    DeltaU = Uout - Umean

    if DeltaU < 10**-6:
        jud = '没有经济意义'
    else:
        jud = '有经济意义'
    print('Econ Gains: Delta U = {:f}, Upred = {:f}, Umean = {:f}'.format(DeltaU, Uout, Umean))
    print('Inference: {:s}'.format(jud))

    return Uout, Umean, DeltaU
In [12]:
# 样本内检验
# 单因子模型:OLS线性拟合
factor = 'DP'
model = smf.ols('ExRet ~ DPL1', data=data[['ExRet', 'DPL1']])
results = model.fit()
rg_con = results.params['Intercept']
rg_con_pvalue = results.pvalues['Intercept']
rg_DP = results.params['DPL1']
rg_DP_pvalue = results.pvalues['DPL1']
if rg_DP_pvalue <= 0.01:
    jud = '在1%的显著性水平下有样本内预测能力'
elif (rg_DP_pvalue > 0.01) & (rg_DP_pvalue <= 0.05):
    jud = '在5%的显著性水平下有样本内预测能力'
elif (rg_DP_pvalue > 0.05) & (rg_DP_pvalue <= 0.1):
    jud = '在10%的显著性水平下有样本内预测能力'
else:
    jud = '无样本内预测能力'
print('In-sample tests for one factor model with OLS:')
print('Predictor: {:s}'.format(factor))
print('Regressing Results: b = {:f}, k = {:f}'.format(rg_con, rg_DP))
print('Regressing Pvalues: p = {:f}, p = {:f}'.format(rg_con_pvalue, rg_DP_pvalue))
print('Inference: {:s}'.format(jud))
In-sample tests for one factor model with OLS:
Predictor: DP
Regressing Results: b = 0.029255, k = 0.006683
Regressing Pvalues: p = 0.014418, p = 0.056033
Inference: 在10%的显著性水平下有样本内预测能力
In [13]:
# 样本外检验
# 单因子模型: OLS线性拟合
factor_out = 'DP'
datafit = data[['yyyymm', 'Ret', 'Rfree', 'ExRet', 'DP', 'DPL1']].copy(deep=True)

n_in = np.sum(datafit['yyyymm'] <= 195612)
n_out = np.sum(datafit['yyyymm'] > 195612)
rout = np.zeros(n_out)
rmean = np.zeros(n_out)
rreal = np.zeros(n_out)
rfree = np.zeros(n_out)
volt2 = np.zeros(n_out)


for i in range(n_out):
    model = smf.ols('ExRet ~ DPL1', data=datafit[['ExRet', 'DPL1']].iloc[:(n_in+i),:])
    results = model.fit()
    b = results.params['Intercept']
    k = results.params['DPL1']
    f = datafit['DP'].iloc[n_in+i-1]
    rreal[i] = datafit['ExRet'].iloc[n_in+i]
    rfree[i] = datafit['Rfree'].iloc[n_in+i]
    rout[i] = k*f+b
    rmean[i] = np.mean(datafit['ExRet'].iloc[:(n_in+i)].values)
    volt2[i] = np.sum(datafit['Ret'].iloc[(n_in+i-12):(n_in+i)].values**2)

print()
print('Out-of-sample tests for one factor model with OLS:')
print('Predictor: {:s}'.format(factor_out))
R2os, MFSEadj, pvalue_MFSEadj = myfun_stat_gains(rout, rmean, rreal)
Uout, Umean, DeltaU = myfun_econ_gains(rout, rmean, rreal, rfree, volt2, gmm=5)
del datafit
Out-of-sample tests for one factor model with OLS:
Predictor: DP
Stat gains: R2os = -0.005143, MFSEadj = 2.062252, MFSEpvalue = 0.039526
Inference: 无样本外预测能力
Econ Gains: Delta U = -0.000301, Upred = 0.003771, Umean = 0.004072
Inference: 没有经济意义
In [14]:
# 样本外检验
# 多因子模型:OLS线性拟合
factor_out = 'DP, EP, VOL, BILL, BOND, TERM, CREDIT, PPIG, IPG, MA112, MA312, MOM6'
datafit = data.copy(deep=True)

n_in = np.sum(datafit['yyyymm'] <= 195612)
n_out = np.sum(datafit['yyyymm'] > 195612)
rout = np.zeros(n_out)
rmean = np.zeros(n_out)
rreal = np.zeros(n_out)
rfree = np.zeros(n_out)
volt2 = np.zeros(n_out)

for i in range(n_out):
    model = smf.ols('ExRet ~ DPL1 + EPL1 + VOLL1 + BILLL1 + BONDL1 + TERML1 + CREDITL1 + '
                    'PPIGL1 + IPGL1 + MA112L1 + MA312L1 + MOM6L1',
                    data=datafit[['ExRet', 'DPL1', 'EPL1', 'VOLL1', 'BILLL1', 'BONDL1', 'TERML1',
                                  'CREDITL1', 'PPIGL1', 'IPGL1', 'MA112L1', 'MA312L1', 'MOM6L1']].iloc[:(n_in+i), :])
    results = model.fit()
    k = results.params.values
    f = datafit[['DP', 'EP', 'VOL', 'BILL', 'BOND', 'TERM', 'CREDIT', 'PPIG',
                 'IPG', 'MA112', 'MA312', 'MOM6']].iloc[n_in+i-1, :].values
    f = np.concatenate((np.array([1]), f))
    rreal[i] = datafit['ExRet'].iloc[n_in+i]
    rfree[i] = datafit['Rfree'].iloc[n_in+i]
    rout[i] = np.sum(k*f)
    rmean[i] = np.mean(datafit['ExRet'].iloc[:(n_in+i)].values)
    volt2[i] = np.sum(datafit['Ret'].iloc[(n_in+i-12):(n_in+i)].values**2)

print()
print('Out-of-sample tests for multi-factor model with OLS:')
print('Predictor: {:s}'.format(factor_out))
R2os, MFSEadj, pvalue_MFSEadj = myfun_stat_gains(rout, rmean, rreal)
Uout, Umean, DeltaU = myfun_econ_gains(rout, rmean, rreal, rfree, volt2, gmm=5)
del datafit
Out-of-sample tests for multi-factor model with OLS:
Predictor: DP, EP, VOL, BILL, BOND, TERM, CREDIT, PPIG, IPG, MA112, MA312, MOM6
Stat gains: R2os = -0.032711, MFSEadj = 2.848436, MFSEpvalue = 0.004513
Inference: 无样本外预测能力
Econ Gains: Delta U = 0.000494, Upred = 0.004566, Umean = 0.004072
Inference: 有经济意义
In [15]:
# 样本外检验
# 多因子模型:LASSO回归, Ridge回归,ElasticNet回归

factor_out = 'DP, EP, VOL, BILL, BOND, TERM, CREDIT, PPIG, IPG, MA112, MA312, MOM6'
factor_list = np.array(['DP', 'EP', 'VOL', 'BILL', 'BOND', 'TERM', 'CREDIT', 'PPIG', 'IPG', 'MA112', 'MA312', 'MOM6'])

datafit = data.copy(deep=True)

n_in = np.sum(datafit['yyyymm'] <= 195612)
n_out = np.sum(datafit['yyyymm'] > 195612)
rout = np.zeros(n_out)
rmean = np.zeros(n_out)
rreal = np.zeros(n_out)
rfree = np.zeros(n_out)
volt2 = np.zeros(n_out)

# reg = sklm.LassoCV(random_state=0, cv=10, fit_intercept=True, normalize=True, precompute='auto', copy_X=True, n_jobs=-1, max_iter=10**9, tol=10-6)
# reg_lasso = linear_model.LassoLarsCV(cv=10, fit_intercept=True, normalize=True, precompute='auto', copy_X=True, n_jobs=-1, max_iter=10000000)
reg = sklm.RidgeCV(cv=10, fit_intercept=True, normalize=True)
# reg = sklm.ElasticNetCV(random_state=0, cv=10, fit_intercept=True, normalize=True, precompute='auto', copy_X=True, n_jobs=-1, max_iter=10**9, tol=10-6)
for i in range(n_out):
    X = datafit[['DPL1', 'EPL1', 'VOLL1', 'BILLL1', 'BONDL1', 'TERML1',
                 'CREDITL1', 'PPIGL1', 'IPGL1', 'MA112L1', 'MA312L1', 'MOM6L1']].iloc[:(n_in+i), :].values
    y = datafit['ExRet'].iloc[:(n_in+i)].values
    reg.fit(X, y)
    # print(factor_list[np.abs(reg.coef_) != 0])
    k = np.concatenate((np.array([reg.intercept_]), reg.coef_))
    f = datafit[['DP', 'EP', 'VOL', 'BILL', 'BOND', 'TERM', 'CREDIT', 'PPIG',
                 'IPG', 'MA112', 'MA312', 'MOM6']].iloc[n_in+i-1, :].values
    f = np.concatenate((np.array([1]), f))
    rreal[i] = datafit['ExRet'].iloc[n_in+i]
    rfree[i] = datafit['Rfree'].iloc[n_in+i]
    rout[i] = np.sum(k*f)
    rmean[i] = np.mean(datafit['ExRet'].iloc[:(n_in+i)].values)
    volt2[i] = np.sum(datafit['Ret'].iloc[(n_in+i-12):(n_in+i)].values**2)

print()
print('Out-of-sample tests for multi-factor model with ML method:')
print('Predictor: {:s}'.format(factor_out))
R2os, MFSEadj, pvalue_MFSEadj = myfun_stat_gains(rout, rmean, rreal)
Uout, Umean, DeltaU = myfun_econ_gains(rout, rmean, rreal, rfree, volt2, gmm=5)
del datafit
Out-of-sample tests for multi-factor model with ML method:
Predictor: DP, EP, VOL, BILL, BOND, TERM, CREDIT, PPIG, IPG, MA112, MA312, MOM6
Stat gains: R2os = 0.014761, MFSEadj = 2.583914, MFSEpvalue = 0.009956
Inference: 在1%的显著性水平下有样本外预测能力
Econ Gains: Delta U = 0.000174, Upred = 0.004246, Umean = 0.004072
Inference: 有经济意义