华东理工大学《金融计算》¶

中国股市收益率可预测性实证研究¶

蒋志强¶

In [2]:

import numpy as np
import pandas as pd
from math import pi
import warnings
warnings.filterwarnings('ignore')

import statsmodels.api as sm
import statsmodels.formula.api as smf
import sklearn.linear_model as sklm
import matplotlib.pyplot as plt

In [3]:

data = pd.read_excel('1EData_PredictorData2019.xlsx', sheet_name='Monthly')
data

Out[3]:

	yyyymm	Index	D12	E12	b/m	tbl	AAA	BAA	lty	ntis	Rfree	infl	ltr	corpr	svar	csp	CRSP_SPvw	CRSP_SPvwx	PPIG	IPG
0	187101	4.440000	0.260000	0.40	NaN	NaN	NaN	NaN	NaN	NaN	0.004967	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN
1	187102	4.500000	0.260000	0.40	NaN	NaN	NaN	NaN	NaN	NaN	0.004525	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN
2	187103	4.610000	0.260000	0.40	NaN	NaN	NaN	NaN	NaN	NaN	0.004252	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN
3	187104	4.740000	0.260000	0.40	NaN	NaN	NaN	NaN	NaN	NaN	0.004643	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN
4	187105	4.860000	0.260000	0.40	NaN	NaN	NaN	NaN	NaN	NaN	0.003698	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN
...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...
1783	201908	2926.459961	56.838763	133.69	0.237917	0.0210	0.0298	0.0387	0.0163	-0.010244	0.001625	-0.000051	0.0797	0.0738	0.004318	NaN	-0.016085	-0.018377	-0.747384	0.705045
1784	201909	2976.739990	57.219507	132.90	0.233377	0.0195	0.0303	0.0391	0.0170	-0.010959	0.001575	0.000783	-0.0192	-0.0190	0.000605	NaN	0.018791	0.017272	-0.401606	-0.347551
1785	201910	3037.560059	57.559879	135.09	0.232261	0.0189	0.0301	0.0392	0.0171	-0.013267	0.001375	0.002286	-0.0052	0.0006	0.001510	NaN	0.021621	0.020441	0.100806	-0.406952
1786	201911	3140.979980	57.900251	137.28	0.223938	0.0165	0.0306	0.0394	0.0181	-0.007907	0.001283	-0.000536	-0.0059	0.0014	0.000306	NaN	0.036206	0.033979	0.201410	0.928027
1787	201912	3230.780029	58.240623	139.47	0.220116	0.0154	0.0301	0.0388	0.0186	-0.007306	0.001283	-0.000910	-0.0253	-0.0089	0.000502	NaN	0.029788	0.028136	0.000000	-0.366325

1788 rows × 20 columns

In [4]:

data['DP'] = data['D12'].apply(np.log) - data['Index'].apply(np.log)
data['EP'] = data['E12'].apply(np.log) - data['Index'].apply(np.log)
data['VOL'] = data['CRSP_SPvw'].abs().rolling(window=12).mean()*np.sqrt(pi/6)
data['BILL'] = data['tbl'] - data['tbl'].rolling(window=12).mean()
data['BOND'] = data['lty'] - data['lty'].rolling(window=12).mean()
data['TERM'] = data['lty'] - data['tbl']
data['CREDIT'] = data['AAA'] - data['lty']
data['MA112'] = data['Index'] >= data['Index'].rolling(window=12).mean()
data['MA312']  =data['Index'].rolling(window=3).mean() >= data['Index'].rolling(window=12).mean()
data['MOM6'] = data['Index'] >= data['Index'].shift(periods=6)
data['ExRet'] = data['CRSP_SPvw'] - data['Rfree']
data[['MA112', 'MA312', 'MOM6']] = data[['MA112', 'MA312', 'MOM6']].astype(int)
data

Out[4]:

	yyyymm	Index	D12	E12	b/m	tbl	AAA	BAA	lty	ntis	...	EP	VOL	BILL	BOND	TERM	CREDIT	MA112	MA312	MOM6	ExRet
0	187101	4.440000	0.260000	0.40	NaN	NaN	NaN	NaN	NaN	NaN	...	-2.406945	NaN	NaN	NaN	NaN	NaN	0	0	0	NaN
1	187102	4.500000	0.260000	0.40	NaN	NaN	NaN	NaN	NaN	NaN	...	-2.420368	NaN	NaN	NaN	NaN	NaN	0	0	0	NaN
2	187103	4.610000	0.260000	0.40	NaN	NaN	NaN	NaN	NaN	NaN	...	-2.444519	NaN	NaN	NaN	NaN	NaN	0	0	0	NaN
3	187104	4.740000	0.260000	0.40	NaN	NaN	NaN	NaN	NaN	NaN	...	-2.472328	NaN	NaN	NaN	NaN	NaN	0	0	0	NaN
4	187105	4.860000	0.260000	0.40	NaN	NaN	NaN	NaN	NaN	NaN	...	-2.497329	NaN	NaN	NaN	NaN	NaN	0	0	0	NaN
...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...
1783	201908	2926.459961	56.838763	133.69	0.237917	0.0210	0.0298	0.0387	0.0163	-0.010244	...	-3.086025	0.031396	-0.001725	-0.009958	-0.0047	0.0135	1	1	1	-0.017710
1784	201909	2976.739990	57.219507	132.90	0.233377	0.0195	0.0303	0.0391	0.0170	-0.010959	...	-3.108987	0.032219	-0.003158	-0.007892	-0.0025	0.0133	1	1	1	0.017216
1785	201910	3037.560059	57.559879	135.09	0.232261	0.0189	0.0301	0.0392	0.0171	-0.013267	...	-3.112869	0.029398	-0.003558	-0.006283	-0.0018	0.0130	1	1	1	0.020246
1786	201911	3140.979980	57.900251	137.28	0.223938	0.0165	0.0306	0.0394	0.0181	-0.007907	...	-3.130267	0.030390	-0.005458	-0.004150	0.0016	0.0125	1	1	1	0.034923
1787	201912	3230.780029	58.240623	139.47	0.220116	0.0154	0.0301	0.0388	0.0186	-0.007306	...	-3.142629	0.026720	-0.005900	-0.002833	0.0032	0.0115	1	1	1	0.028505

1788 rows × 31 columns

In [5]:

data = pd.concat([data[['yyyymm', 'CRSP_SPvw', 'Rfree', 'ExRet',
                        'DP', 'EP', 'VOL', 'BILL', 'BOND', 'TERM', 'CREDIT', 'PPIG', 'IPG',
                        'MA112', 'MA312', 'MOM6']],
                  data[['DP', 'EP', 'VOL', 'BILL', 'BOND', 'TERM', 'CREDIT', 'PPIG', 'IPG',
                        'MA112', 'MA312', 'MOM6']].shift(periods=1)], axis=1)
data

Out[5]:

	yyyymm	CRSP_SPvw	Rfree	ExRet	DP	EP	VOL	BILL	BOND	TERM	...	VOL	BILL	BOND	TERM	CREDIT	PPIG	IPG	MA112	MA312	MOM6
0	187101	NaN	0.004967	NaN	-2.837728	-2.406945	NaN	NaN	NaN	NaN	...	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN
1	187102	NaN	0.004525	NaN	-2.851151	-2.420368	NaN	NaN	NaN	NaN	...	NaN	NaN	NaN	NaN	NaN	NaN	NaN	0.0	0.0	0.0
2	187103	NaN	0.004252	NaN	-2.875302	-2.444519	NaN	NaN	NaN	NaN	...	NaN	NaN	NaN	NaN	NaN	NaN	NaN	0.0	0.0	0.0
3	187104	NaN	0.004643	NaN	-2.903111	-2.472328	NaN	NaN	NaN	NaN	...	NaN	NaN	NaN	NaN	NaN	NaN	NaN	0.0	0.0	0.0
4	187105	NaN	0.003698	NaN	-2.928112	-2.497329	NaN	NaN	NaN	NaN	...	NaN	NaN	NaN	NaN	NaN	NaN	NaN	0.0	0.0	0.0
...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...
1783	201908	-0.016085	0.001625	-0.017710	-3.941330	-3.086025	0.031396	-0.001725	-0.009958	-0.0047	...	0.032412	-0.000908	-0.006742	-0.0011	0.0123	0.199700	-0.175883	1.0	1.0	1.0
1784	201909	0.018791	0.001575	0.017216	-3.951689	-3.108987	0.032219	-0.003158	-0.007892	-0.0025	...	0.031396	-0.001725	-0.009958	-0.0047	0.0135	-0.747384	0.705045	1.0	1.0	1.0
1785	201910	0.021621	0.001375	0.020246	-3.965984	-3.112869	0.029398	-0.003558	-0.006283	-0.0018	...	0.032219	-0.003158	-0.007892	-0.0025	0.0133	-0.401606	-0.347551	1.0	1.0	1.0
1786	201911	0.036206	0.001283	0.034923	-3.993568	-3.130267	0.030390	-0.005458	-0.004150	0.0016	...	0.029398	-0.003558	-0.006283	-0.0018	0.0130	0.100806	-0.406952	1.0	1.0	1.0
1787	201912	0.029788	0.001283	0.028505	-4.015896	-3.142629	0.026720	-0.005900	-0.002833	0.0032	...	0.030390	-0.005458	-0.004150	0.0016	0.0125	0.201410	0.928027	1.0	1.0	1.0

1788 rows × 28 columns

In [6]:

data.columns = ['yyyymm', 'Ret', 'Rfree', 'ExRet',
                'DP', 'EP', 'VOL', 'BILL', 'BOND', 'TERM', 'CREDIT', 'PPIG', 'IPG',
                'MA112', 'MA312', 'MOM6', 'DPL1',
                'EPL1', 'VOLL1', 'BILLL1', 'BONDL1', 'TERML1', 'CREDITL1', 'PPIGL1', 'IPGL1',
                'MA112L1', 'MA312L1', 'MOM6L1']
data

Out[6]:

	yyyymm	Ret	Rfree	ExRet	DP	EP	VOL	BILL	BOND	TERM	...	VOLL1	BILLL1	BONDL1	TERML1	CREDITL1	PPIGL1	IPGL1	MA112L1	MA312L1	MOM6L1
0	187101	NaN	0.004967	NaN	-2.837728	-2.406945	NaN	NaN	NaN	NaN	...	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN
1	187102	NaN	0.004525	NaN	-2.851151	-2.420368	NaN	NaN	NaN	NaN	...	NaN	NaN	NaN	NaN	NaN	NaN	NaN	0.0	0.0	0.0
2	187103	NaN	0.004252	NaN	-2.875302	-2.444519	NaN	NaN	NaN	NaN	...	NaN	NaN	NaN	NaN	NaN	NaN	NaN	0.0	0.0	0.0
3	187104	NaN	0.004643	NaN	-2.903111	-2.472328	NaN	NaN	NaN	NaN	...	NaN	NaN	NaN	NaN	NaN	NaN	NaN	0.0	0.0	0.0
4	187105	NaN	0.003698	NaN	-2.928112	-2.497329	NaN	NaN	NaN	NaN	...	NaN	NaN	NaN	NaN	NaN	NaN	NaN	0.0	0.0	0.0
...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...
1783	201908	-0.016085	0.001625	-0.017710	-3.941330	-3.086025	0.031396	-0.001725	-0.009958	-0.0047	...	0.032412	-0.000908	-0.006742	-0.0011	0.0123	0.199700	-0.175883	1.0	1.0	1.0
1784	201909	0.018791	0.001575	0.017216	-3.951689	-3.108987	0.032219	-0.003158	-0.007892	-0.0025	...	0.031396	-0.001725	-0.009958	-0.0047	0.0135	-0.747384	0.705045	1.0	1.0	1.0
1785	201910	0.021621	0.001375	0.020246	-3.965984	-3.112869	0.029398	-0.003558	-0.006283	-0.0018	...	0.032219	-0.003158	-0.007892	-0.0025	0.0133	-0.401606	-0.347551	1.0	1.0	1.0
1786	201911	0.036206	0.001283	0.034923	-3.993568	-3.130267	0.030390	-0.005458	-0.004150	0.0016	...	0.029398	-0.003558	-0.006283	-0.0018	0.0130	0.100806	-0.406952	1.0	1.0	1.0
1787	201912	0.029788	0.001283	0.028505	-4.015896	-3.142629	0.026720	-0.005900	-0.002833	0.0032	...	0.030390	-0.005458	-0.004150	0.0016	0.0125	0.201410	0.928027	1.0	1.0	1.0

1788 rows × 28 columns

In [7]:

data = data[data['yyyymm'] >= 192701]
data.reset_index(drop=True, inplace=True)
data

Out[7]:

	yyyymm	Ret	Rfree	ExRet	DP	EP	VOL	BILL	BOND	TERM	...	VOLL1	BILLL1	BONDL1	TERML1	CREDITL1	PPIGL1	IPGL1	MA112L1	MA312L1	MOM6L1
0	192701	-0.002910	0.002692	-0.005602	-2.942374	-2.374773	0.022268	-0.001625	-0.001508	0.0044	...	0.022200	0.000808	-0.001400	0.0019	0.0114	-0.588235	-0.400104	1.0	1.0	1.0
1	192702	0.045522	0.002742	0.042780	-2.979535	-2.430353	0.023005	0.000192	-0.001700	0.0024	...	0.022268	-0.001625	-0.001508	0.0044	0.0115	-2.958580	-0.401711	1.0	1.0	1.0
2	192703	0.007324	0.002667	0.004657	-2.976535	-2.445079	0.019967	0.000700	-0.002967	0.0002	...	0.023005	0.000192	-0.001700	0.0024	0.0120	1.219512	0.806663	1.0	1.0	1.0
3	192704	0.013021	0.002825	0.010196	-2.984225	-2.471309	0.018429	-0.000250	-0.002475	0.0013	...	0.019967	0.000700	-0.002967	0.0002	0.0131	-0.602410	1.200312	1.0	1.0	1.0
4	192705	0.062353	0.002775	0.059578	-3.025963	-2.531446	0.021368	0.001392	-0.002725	-0.0012	...	0.018429	-0.000250	-0.002475	0.0013	0.0125	-1.212121	-2.372151	1.0	1.0	1.0
...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...
1111	201908	-0.016085	0.001625	-0.017710	-3.941330	-3.086025	0.031396	-0.001725	-0.009958	-0.0047	...	0.032412	-0.000908	-0.006742	-0.0011	0.0123	0.199700	-0.175883	1.0	1.0	1.0
1112	201909	0.018791	0.001575	0.017216	-3.951689	-3.108987	0.032219	-0.003158	-0.007892	-0.0025	...	0.031396	-0.001725	-0.009958	-0.0047	0.0135	-0.747384	0.705045	1.0	1.0	1.0
1113	201910	0.021621	0.001375	0.020246	-3.965984	-3.112869	0.029398	-0.003558	-0.006283	-0.0018	...	0.032219	-0.003158	-0.007892	-0.0025	0.0133	-0.401606	-0.347551	1.0	1.0	1.0
1114	201911	0.036206	0.001283	0.034923	-3.993568	-3.130267	0.030390	-0.005458	-0.004150	0.0016	...	0.029398	-0.003558	-0.006283	-0.0018	0.0130	0.100806	-0.406952	1.0	1.0	1.0
1115	201912	0.029788	0.001283	0.028505	-4.015896	-3.142629	0.026720	-0.005900	-0.002833	0.0032	...	0.030390	-0.005458	-0.004150	0.0016	0.0125	0.201410	0.928027	1.0	1.0	1.0

1116 rows × 28 columns

In [8]:

data['DP'].plot()

Out[8]:

<matplotlib.axes._subplots.AxesSubplot at 0x7fd9cca22c10>

In [9]:

data['EP'].plot()

Out[9]:

<matplotlib.axes._subplots.AxesSubplot at 0x7fd9cc8b8190>

In [10]:

def myfun_stat_gains(rout, rmean, rreal):
    R2os = 1 - np.sum((rreal-rout)**2)/np.sum((rreal-rmean)**2)
    d = (rreal - rmean)**2 - ((rreal-rout)**2 - (rmean-rout)**2)
    x = sm.add_constant(np.arange(len(d))+1)
    model = sm.OLS(d, x)
    fitres = model.fit()
    MFSEadj = fitres.tvalues[0]
    pvalue_MFSEadj = fitres.pvalues[0]

    if (R2os > 0) & (pvalue_MFSEadj <= 0.01):
        jud = '在1%的显著性水平下有样本外预测能力'
    elif (R2os > 0) & (pvalue_MFSEadj > 0.01) & (pvalue_MFSEadj <= 0.05):
        jud = '在5%的显著性水平下有样本外预测能力'
    elif (R2os > 0) & (pvalue_MFSEadj > 0.05) & (pvalue_MFSEadj <= 0.1):
        jud = '在10%的显著性水平下有样本外预测能力'
    else:
        jud = '无样本外预测能力'
    print('Stat gains: R2os = {:f}, MFSEadj = {:f}, MFSEpvalue = {:f}'.format(R2os, MFSEadj, pvalue_MFSEadj))
    print('Inference: {:s}'.format(jud))

    return R2os, MFSEadj, pvalue_MFSEadj

In [11]:

def myfun_econ_gains(rout, rmean, rreal, rfree, volt2, gmm = 5):
    omg_out = rout/volt2/gmm
    rp_out = rfree + omg_out*rreal
    Uout = np.mean(rp_out) - 0.5*gmm*np.var(rp_out)
    omg_mean = rmean/volt2/gmm
    rp_mean = rfree + omg_mean*rreal
    Umean = np.mean(rp_mean) - 0.5*gmm*np.var(rp_mean)
    DeltaU = Uout - Umean

    if DeltaU < 10**-6:
        jud = '没有经济意义'
    else:
        jud = '有经济意义'
    print('Econ Gains: Delta U = {:f}, Upred = {:f}, Umean = {:f}'.format(DeltaU, Uout, Umean))
    print('Inference: {:s}'.format(jud))

    return Uout, Umean, DeltaU

In [12]:

# 样本内检验
# 单因子模型：OLS线性拟合
factor = 'DP'
model = smf.ols('ExRet ~ DPL1', data=data[['ExRet', 'DPL1']])
results = model.fit()
rg_con = results.params['Intercept']
rg_con_pvalue = results.pvalues['Intercept']
rg_DP = results.params['DPL1']
rg_DP_pvalue = results.pvalues['DPL1']
if rg_DP_pvalue <= 0.01:
    jud = '在1%的显著性水平下有样本内预测能力'
elif (rg_DP_pvalue > 0.01) & (rg_DP_pvalue <= 0.05):
    jud = '在5%的显著性水平下有样本内预测能力'
elif (rg_DP_pvalue > 0.05) & (rg_DP_pvalue <= 0.1):
    jud = '在10%的显著性水平下有样本内预测能力'
else:
    jud = '无样本内预测能力'
print('In-sample tests for one factor model with OLS:')
print('Predictor: {:s}'.format(factor))
print('Regressing Results: b = {:f}, k = {:f}'.format(rg_con, rg_DP))
print('Regressing Pvalues: p = {:f}, p = {:f}'.format(rg_con_pvalue, rg_DP_pvalue))
print('Inference: {:s}'.format(jud))

In-sample tests for one factor model with OLS:
Predictor: DP
Regressing Results: b = 0.029255, k = 0.006683
Regressing Pvalues: p = 0.014418, p = 0.056033
Inference: 在10%的显著性水平下有样本内预测能力

In [13]:

# 样本外检验
# 单因子模型: OLS线性拟合
factor_out = 'DP'
datafit = data[['yyyymm', 'Ret', 'Rfree', 'ExRet', 'DP', 'DPL1']].copy(deep=True)

n_in = np.sum(datafit['yyyymm'] <= 195612)
n_out = np.sum(datafit['yyyymm'] > 195612)
rout = np.zeros(n_out)
rmean = np.zeros(n_out)
rreal = np.zeros(n_out)
rfree = np.zeros(n_out)
volt2 = np.zeros(n_out)


for i in range(n_out):
    model = smf.ols('ExRet ~ DPL1', data=datafit[['ExRet', 'DPL1']].iloc[:(n_in+i),:])
    results = model.fit()
    b = results.params['Intercept']
    k = results.params['DPL1']
    f = datafit['DP'].iloc[n_in+i-1]
    rreal[i] = datafit['ExRet'].iloc[n_in+i]
    rfree[i] = datafit['Rfree'].iloc[n_in+i]
    rout[i] = k*f+b
    rmean[i] = np.mean(datafit['ExRet'].iloc[:(n_in+i)].values)
    volt2[i] = np.sum(datafit['Ret'].iloc[(n_in+i-12):(n_in+i)].values**2)

print()
print('Out-of-sample tests for one factor model with OLS:')
print('Predictor: {:s}'.format(factor_out))
R2os, MFSEadj, pvalue_MFSEadj = myfun_stat_gains(rout, rmean, rreal)
Uout, Umean, DeltaU = myfun_econ_gains(rout, rmean, rreal, rfree, volt2, gmm=5)
del datafit

Out-of-sample tests for one factor model with OLS:
Predictor: DP
Stat gains: R2os = -0.005143, MFSEadj = 2.062252, MFSEpvalue = 0.039526
Inference: 无样本外预测能力
Econ Gains: Delta U = -0.000301, Upred = 0.003771, Umean = 0.004072
Inference: 没有经济意义

In [14]:

# 样本外检验
# 多因子模型：OLS线性拟合
factor_out = 'DP, EP, VOL, BILL, BOND, TERM, CREDIT, PPIG, IPG, MA112, MA312, MOM6'
datafit = data.copy(deep=True)

n_in = np.sum(datafit['yyyymm'] <= 195612)
n_out = np.sum(datafit['yyyymm'] > 195612)
rout = np.zeros(n_out)
rmean = np.zeros(n_out)
rreal = np.zeros(n_out)
rfree = np.zeros(n_out)
volt2 = np.zeros(n_out)

for i in range(n_out):
    model = smf.ols('ExRet ~ DPL1 + EPL1 + VOLL1 + BILLL1 + BONDL1 + TERML1 + CREDITL1 + '
                    'PPIGL1 + IPGL1 + MA112L1 + MA312L1 + MOM6L1',
                    data=datafit[['ExRet', 'DPL1', 'EPL1', 'VOLL1', 'BILLL1', 'BONDL1', 'TERML1',
                                  'CREDITL1', 'PPIGL1', 'IPGL1', 'MA112L1', 'MA312L1', 'MOM6L1']].iloc[:(n_in+i), :])
    results = model.fit()
    k = results.params.values
    f = datafit[['DP', 'EP', 'VOL', 'BILL', 'BOND', 'TERM', 'CREDIT', 'PPIG',
                 'IPG', 'MA112', 'MA312', 'MOM6']].iloc[n_in+i-1, :].values
    f = np.concatenate((np.array([1]), f))
    rreal[i] = datafit['ExRet'].iloc[n_in+i]
    rfree[i] = datafit['Rfree'].iloc[n_in+i]
    rout[i] = np.sum(k*f)
    rmean[i] = np.mean(datafit['ExRet'].iloc[:(n_in+i)].values)
    volt2[i] = np.sum(datafit['Ret'].iloc[(n_in+i-12):(n_in+i)].values**2)

print()
print('Out-of-sample tests for multi-factor model with OLS:')
print('Predictor: {:s}'.format(factor_out))
R2os, MFSEadj, pvalue_MFSEadj = myfun_stat_gains(rout, rmean, rreal)
Uout, Umean, DeltaU = myfun_econ_gains(rout, rmean, rreal, rfree, volt2, gmm=5)
del datafit

Out-of-sample tests for multi-factor model with OLS:
Predictor: DP, EP, VOL, BILL, BOND, TERM, CREDIT, PPIG, IPG, MA112, MA312, MOM6
Stat gains: R2os = -0.032711, MFSEadj = 2.848436, MFSEpvalue = 0.004513
Inference: 无样本外预测能力
Econ Gains: Delta U = 0.000494, Upred = 0.004566, Umean = 0.004072
Inference: 有经济意义

In [15]:

# 样本外检验
# 多因子模型：LASSO回归, Ridge回归，ElasticNet回归

factor_out = 'DP, EP, VOL, BILL, BOND, TERM, CREDIT, PPIG, IPG, MA112, MA312, MOM6'
factor_list = np.array(['DP', 'EP', 'VOL', 'BILL', 'BOND', 'TERM', 'CREDIT', 'PPIG', 'IPG', 'MA112', 'MA312', 'MOM6'])

datafit = data.copy(deep=True)

n_in = np.sum(datafit['yyyymm'] <= 195612)
n_out = np.sum(datafit['yyyymm'] > 195612)
rout = np.zeros(n_out)
rmean = np.zeros(n_out)
rreal = np.zeros(n_out)
rfree = np.zeros(n_out)
volt2 = np.zeros(n_out)

# reg = sklm.LassoCV(random_state=0, cv=10, fit_intercept=True, normalize=True, precompute='auto', copy_X=True, n_jobs=-1, max_iter=10**9, tol=10-6)
# reg_lasso = linear_model.LassoLarsCV(cv=10, fit_intercept=True, normalize=True, precompute='auto', copy_X=True, n_jobs=-1, max_iter=10000000)
reg = sklm.RidgeCV(cv=10, fit_intercept=True, normalize=True)
# reg = sklm.ElasticNetCV(random_state=0, cv=10, fit_intercept=True, normalize=True, precompute='auto', copy_X=True, n_jobs=-1, max_iter=10**9, tol=10-6)
for i in range(n_out):
    X = datafit[['DPL1', 'EPL1', 'VOLL1', 'BILLL1', 'BONDL1', 'TERML1',
                 'CREDITL1', 'PPIGL1', 'IPGL1', 'MA112L1', 'MA312L1', 'MOM6L1']].iloc[:(n_in+i), :].values
    y = datafit['ExRet'].iloc[:(n_in+i)].values
    reg.fit(X, y)
    # print(factor_list[np.abs(reg.coef_) != 0])
    k = np.concatenate((np.array([reg.intercept_]), reg.coef_))
    f = datafit[['DP', 'EP', 'VOL', 'BILL', 'BOND', 'TERM', 'CREDIT', 'PPIG',
                 'IPG', 'MA112', 'MA312', 'MOM6']].iloc[n_in+i-1, :].values
    f = np.concatenate((np.array([1]), f))
    rreal[i] = datafit['ExRet'].iloc[n_in+i]
    rfree[i] = datafit['Rfree'].iloc[n_in+i]
    rout[i] = np.sum(k*f)
    rmean[i] = np.mean(datafit['ExRet'].iloc[:(n_in+i)].values)
    volt2[i] = np.sum(datafit['Ret'].iloc[(n_in+i-12):(n_in+i)].values**2)

print()
print('Out-of-sample tests for multi-factor model with ML method:')
print('Predictor: {:s}'.format(factor_out))
R2os, MFSEadj, pvalue_MFSEadj = myfun_stat_gains(rout, rmean, rreal)
Uout, Umean, DeltaU = myfun_econ_gains(rout, rmean, rreal, rfree, volt2, gmm=5)
del datafit

Out-of-sample tests for multi-factor model with ML method:
Predictor: DP, EP, VOL, BILL, BOND, TERM, CREDIT, PPIG, IPG, MA112, MA312, MOM6
Stat gains: R2os = 0.014761, MFSEadj = 2.583914, MFSEpvalue = 0.009956
Inference: 在1%的显著性水平下有样本外预测能力
Econ Gains: Delta U = 0.000174, Upred = 0.004246, Umean = 0.004072
Inference: 有经济意义