{
"cells": [
{
"cell_type": "code",
"execution_count": 48,
"outputs": [],
"source": [
"import pandas as pd\n",
"import math\n",
"import statsmodels.api as sm\n",
"import os\n",
"import linearmodels"
],
"metadata": {
"collapsed": false
}
},
{
"cell_type": "code",
"execution_count": 49,
"metadata": {
"collapsed": true
},
"outputs": [
{
"data": {
"text/plain": " size share\nstk month \n8 5 1 3\n9 6 2 4",
"text/html": "
\n\n
\n \n \n | \n | \n size | \n share | \n
\n \n | stk | \n month | \n | \n | \n
\n \n \n \n | 8 | \n 5 | \n 1 | \n 3 | \n
\n \n | 9 | \n 6 | \n 2 | \n 4 | \n
\n \n
\n
"
},
"execution_count": 49,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"DF=pd.DataFrame({'stk':[8,9],'month':[5,6],'size':[1,2],'share':[3,4]})\n",
"DF=DF.set_index('stk')\n",
"DF=DF.set_index('month',append=True)\n",
"DF"
]
},
{
"cell_type": "code",
"execution_count": 50,
"outputs": [
{
"ename": "NameError",
"evalue": "name 'FamaMacBeth' is not defined",
"output_type": "error",
"traceback": [
"\u001B[1;31m---------------------------------------------------------------------------\u001B[0m",
"\u001B[1;31mNameError\u001B[0m Traceback (most recent call last)",
"\u001B[1;32m~\\AppData\\Local\\Temp\\ipykernel_8936\\3597670844.py\u001B[0m in \u001B[0;36m\u001B[1;34m\u001B[0m\n\u001B[0;32m 27\u001B[0m \u001B[1;31m# print(i)\u001B[0m\u001B[1;33m\u001B[0m\u001B[1;33m\u001B[0m\u001B[1;33m\u001B[0m\u001B[0m\n\u001B[0;32m 28\u001B[0m \u001B[1;32mreturn\u001B[0m \u001B[0mlag\u001B[0m\u001B[1;33m\u001B[0m\u001B[1;33m\u001B[0m\u001B[0m\n\u001B[1;32m---> 29\u001B[1;33m \u001B[0mFamaMacBeth_summary\u001B[0m\u001B[1;33m(\u001B[0m\u001B[0mDF\u001B[0m\u001B[1;33m,\u001B[0m\u001B[0mreg_lst\u001B[0m\u001B[1;33m,\u001B[0m\u001B[0mreg_order\u001B[0m\u001B[1;33m)\u001B[0m\u001B[1;33m\u001B[0m\u001B[1;33m\u001B[0m\u001B[0m\n\u001B[0m\u001B[0;32m 30\u001B[0m \u001B[1;33m\u001B[0m\u001B[0m\n\u001B[0;32m 31\u001B[0m \u001B[1;33m\u001B[0m\u001B[0m\n",
"\u001B[1;32m~\\AppData\\Local\\Temp\\ipykernel_8936\\3597670844.py\u001B[0m in \u001B[0;36mFamaMacBeth_summary\u001B[1;34m(DF, reg_lst, reg_order, reg_names, params_format, tvalues_format)\u001B[0m\n\u001B[0;32m 16\u001B[0m \u001B[0mT\u001B[0m \u001B[1;33m=\u001B[0m \u001B[0mlen\u001B[0m\u001B[1;33m(\u001B[0m\u001B[0mdf\u001B[0m\u001B[1;33m.\u001B[0m\u001B[0mindex\u001B[0m\u001B[1;33m.\u001B[0m\u001B[0mget_level_values\u001B[0m\u001B[1;33m(\u001B[0m\u001B[0mdf\u001B[0m\u001B[1;33m.\u001B[0m\u001B[0mindex\u001B[0m\u001B[1;33m.\u001B[0m\u001B[0mnames\u001B[0m\u001B[1;33m[\u001B[0m\u001B[1;36m1\u001B[0m\u001B[1;33m]\u001B[0m\u001B[1;33m)\u001B[0m\u001B[1;33m.\u001B[0m\u001B[0munique\u001B[0m\u001B[1;33m(\u001B[0m\u001B[1;33m)\u001B[0m\u001B[1;33m)\u001B[0m\u001B[1;33m\u001B[0m\u001B[1;33m\u001B[0m\u001B[0m\n\u001B[0;32m 17\u001B[0m \u001B[0mlag\u001B[0m \u001B[1;33m=\u001B[0m \u001B[1;36m4\u001B[0m\u001B[1;33m*\u001B[0m\u001B[1;33m(\u001B[0m\u001B[1;36m2\u001B[0m\u001B[1;33m/\u001B[0m\u001B[1;36m100\u001B[0m\u001B[1;33m)\u001B[0m\u001B[1;33m**\u001B[0m\u001B[1;33m(\u001B[0m\u001B[1;36m2\u001B[0m\u001B[1;33m/\u001B[0m\u001B[1;36m9\u001B[0m\u001B[1;33m)\u001B[0m\u001B[1;33m\u001B[0m\u001B[1;33m\u001B[0m\u001B[0m\n\u001B[1;32m---> 18\u001B[1;33m \u001B[0mfmb\u001B[0m \u001B[1;33m=\u001B[0m \u001B[0mFamaMacBeth\u001B[0m\u001B[1;33m(\u001B[0m\u001B[0mdf\u001B[0m\u001B[1;33m[\u001B[0m\u001B[0mreg\u001B[0m\u001B[1;33m[\u001B[0m\u001B[1;36m0\u001B[0m\u001B[1;33m]\u001B[0m\u001B[1;33m]\u001B[0m\u001B[1;33m,\u001B[0m \u001B[0msm\u001B[0m\u001B[1;33m.\u001B[0m\u001B[0madd_constant\u001B[0m\u001B[1;33m(\u001B[0m\u001B[0mdf\u001B[0m\u001B[1;33m[\u001B[0m\u001B[0mreg\u001B[0m\u001B[1;33m[\u001B[0m\u001B[1;36m1\u001B[0m\u001B[1;33m:\u001B[0m\u001B[1;33m]\u001B[0m\u001B[1;33m]\u001B[0m\u001B[1;33m)\u001B[0m\u001B[1;33m)\u001B[0m\u001B[1;33m\u001B[0m\u001B[1;33m\u001B[0m\u001B[0m\n\u001B[0m\u001B[0;32m 19\u001B[0m \u001B[0mfmb\u001B[0m \u001B[1;33m=\u001B[0m \u001B[0mfmb\u001B[0m\u001B[1;33m.\u001B[0m\u001B[0mfit\u001B[0m\u001B[1;33m(\u001B[0m\u001B[0mcov_type\u001B[0m\u001B[1;33m=\u001B[0m\u001B[1;34m'kernel'\u001B[0m\u001B[1;33m,\u001B[0m \u001B[0mbandwidth\u001B[0m\u001B[1;33m=\u001B[0m\u001B[0mlag\u001B[0m\u001B[1;33m)\u001B[0m\u001B[1;33m\u001B[0m\u001B[1;33m\u001B[0m\u001B[0m\n\u001B[0;32m 20\u001B[0m \u001B[1;33m\u001B[0m\u001B[0m\n",
"\u001B[1;31mNameError\u001B[0m: name 'FamaMacBeth' is not defined"
]
}
],
"source": [
"reg_lst=['size','share']\n",
"reg_order=['size','share']\n",
"def FamaMacBeth_summary(DF,\n",
" reg_lst,\n",
" reg_order,\n",
" reg_names=None,\n",
" params_format='{:.3f}',\n",
" tvalues_format='{:.2f}'):\n",
"\n",
" rows = sum([[var, f'{var}_t'] for var in ['const'] + reg_order], [])##sum的两个输入参数,后面是表示初始的相加对象\n",
" if reg_names is None:\n",
" reg_names = [f'({i+1})' for i in range(len(reg_lst))]\n",
" show = pd.DataFrame(index=rows, columns=reg_names)##以rows为行reg_names为列构造dataframe\n",
" for reg, reg_name in zip(reg_lst, reg_names):\n",
" df = DF.loc[:, reg].copy()\n",
" T = len(df.index.get_level_values(df.index.names[1]).unique())\n",
" lag = 4*(2/100)**(2/9)\n",
" fmb = FamaMacBeth(df[reg[0]], sm.add_constant(df[reg[1:]]))\n",
" fmb = fmb.fit(cov_type='kernel', bandwidth=lag)\n",
"\n",
"\n",
"#zip量不同时输出都有的\n",
"# list1 = ['a', 'b', 'c']\n",
"# list2 = [1, 2,3,4]\n",
"# zipped = zip(list2, list1)\n",
"# for i in zipped:\n",
"# print(i)\n",
" return lag\n",
"FamaMacBeth_summary(DF,reg_lst,reg_order)\n",
"\n"
],
"metadata": {
"collapsed": false
}
},
{
"cell_type": "code",
"execution_count": null,
"outputs": [],
"source": [
"df1 = pd.DataFrame({'x':[1,2],'y':[3,4],'z':[8,5]})\n",
"df1=df1.set_index('x')\n",
"\n",
"print(df1)\n",
"df2 = pd.DataFrame({'x':[1,6],'y':[7,8],})\n",
"df2 = df2.set_index('x')\n",
"print(df2)\n",
"df3 = pd.DataFrame({'x':[1,2],'y':[3,4],'z':[8,5]})\n",
"res1 = pd.concat([df1,df2,df3],axis=1,keys=['z','m','l'])\n",
"res1\n",
"factors=pd.merge()"
],
"metadata": {
"collapsed": false
}
},
{
"cell_type": "code",
"execution_count": 57,
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" date mkt ten_return\n",
"0 2010/01/05 0:00 0.005650 -0.0062\n",
"1 2010/01/06 0:00 -0.002050 0.0318\n",
"2 2010/01/07 0:00 -0.019750 0.0154\n",
"3 2010/01/08 0:00 0.006250 -0.0318\n",
"4 2010/01/11 0:00 0.001650 0.0215\n",
"... ... ... ...\n",
"2841 2021/12/27 0:00 0.000832 0.1425\n",
"2842 2021/12/28 0:00 0.008232 0.0567\n",
"2843 2021/12/29 0:00 -0.008068 0.1075\n",
"2844 2021/12/30 0:00 0.009031 0.1670\n",
"2845 2021/12/31 0:00 0.005332 -0.0086\n",
"\n",
"[2846 rows x 3 columns]\n"
]
},
{
"data": {
"text/plain": " code date code_return\n0 1 2010-01-05 00:00:00 -0.0173\n1 2142 2010-01-05 00:00:00 0.0024\n2 600000 2010-01-05 00:00:00 0.0076\n3 600015 2010-01-05 00:00:00 0.0252\n4 600016 2010-01-05 00:00:00 0.0177\n... ... ... ...\n46006 601328 2010-08-09 00:00:00 -0.0092\n46007 601398 2010-08-09 00:00:00 -0.0047\n46008 601939 2010-08-09 00:00:00 -0.0041\n46009 601988 2010-08-09 00:00:00 -0.0057\n46010 601998 2010-08-09 00:00:00 -0.0034\n\n[46011 rows x 3 columns]",
"text/html": "\n\n
\n \n \n | \n code | \n date | \n code_return | \n
\n \n \n \n | 0 | \n 1 | \n 2010-01-05 00:00:00 | \n -0.0173 | \n
\n \n | 1 | \n 2142 | \n 2010-01-05 00:00:00 | \n 0.0024 | \n
\n \n | 2 | \n 600000 | \n 2010-01-05 00:00:00 | \n 0.0076 | \n
\n \n | 3 | \n 600015 | \n 2010-01-05 00:00:00 | \n 0.0252 | \n
\n \n | 4 | \n 600016 | \n 2010-01-05 00:00:00 | \n 0.0177 | \n
\n \n | ... | \n ... | \n ... | \n ... | \n
\n \n | 46006 | \n 601328 | \n 2010-08-09 00:00:00 | \n -0.0092 | \n
\n \n | 46007 | \n 601398 | \n 2010-08-09 00:00:00 | \n -0.0047 | \n
\n \n | 46008 | \n 601939 | \n 2010-08-09 00:00:00 | \n -0.0041 | \n
\n \n | 46009 | \n 601988 | \n 2010-08-09 00:00:00 | \n -0.0057 | \n
\n \n | 46010 | \n 601998 | \n 2010-08-09 00:00:00 | \n -0.0034 | \n
\n \n
\n
46011 rows × 3 columns
\n
"
},
"execution_count": 57,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"from statsmodels.regression.rolling import RollingOLS # 滚动窗口回归\n",
"import statsmodels.api as sm\n",
"ret=pd.read_csv('汇总1.csv',usecols=[0,1,2])\n",
"factors=pd.read_csv('factors.csv',usecols=[0,1,2])\n",
"factornames=['date','mkt','tenreturn']\n",
"print(factors)\n",
"res_ts={}\n",
"ret"
],
"metadata": {
"collapsed": false
}
},
{
"cell_type": "code",
"execution_count": 61,
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" code date code_return\n",
"0 1 2010-09-02 0.0388\n",
"1 2142 2010-09-02 -0.0038\n",
"2 600000 2010-09-02 0.0057\n",
"3 600015 2010-09-02 0.0017\n",
"4 600016 2010-09-02 0.0019\n",
"... ... ... ...\n",
"40651 601398 2021-12-31 -0.0022\n",
"40652 601818 2021-12-31 0.0000\n",
"40653 601939 2021-12-31 -0.0017\n",
"40654 601988 2021-12-31 0.0000\n",
"40655 601998 2021-12-31 0.0022\n",
"\n",
"[40656 rows x 3 columns]\n",
" code date code_return\n",
"0 1 2010-09-02 0.0388\n",
"1 2142 2010-09-02 -0.0038\n",
"2 600000 2010-09-02 0.0057\n",
"3 600015 2010-09-02 0.0017\n",
"4 600016 2010-09-02 0.0019\n",
"... ... ... ...\n",
"40651 601398 2021-12-31 -0.0022\n",
"40652 601818 2021-12-31 0.0000\n",
"40653 601939 2021-12-31 -0.0017\n",
"40654 601988 2021-12-31 0.0000\n",
"40655 601998 2021-12-31 0.0022\n",
"\n",
"[40656 rows x 3 columns]\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"E:\\Python37\\lib\\site-packages\\ipykernel_launcher.py:5: FutureWarning: Passing 'suffixes' which cause duplicate columns {'code_return_x', 'code_x'} in the result is deprecated and will raise a MergeError in a future version.\n",
" \"\"\"\n"
]
},
{
"data": {
"text/plain": " date mkt ten_return\n0 2010-09-02 0.018731 -0.0106\n1 2010-09-03 0.005331 -0.0010\n2 2010-09-06 -0.000469 -0.0174\n3 2010-09-07 0.007131 -0.0870\n4 2010-09-08 0.007731 -0.0058\n... ... ... ...\n2536 2021-12-27 0.000832 0.1425\n2537 2021-12-28 0.008232 0.0567\n2538 2021-12-29 -0.008068 0.1075\n2539 2021-12-30 0.009031 0.1670\n2540 2021-12-31 0.005332 -0.0086\n\n[2541 rows x 3 columns]",
"text/html": "\n\n
\n \n \n | \n date | \n mkt | \n ten_return | \n
\n \n \n \n | 0 | \n 2010-09-02 | \n 0.018731 | \n -0.0106 | \n
\n \n | 1 | \n 2010-09-03 | \n 0.005331 | \n -0.0010 | \n
\n \n | 2 | \n 2010-09-06 | \n -0.000469 | \n -0.0174 | \n
\n \n | 3 | \n 2010-09-07 | \n 0.007131 | \n -0.0870 | \n
\n \n | 4 | \n 2010-09-08 | \n 0.007731 | \n -0.0058 | \n
\n \n | ... | \n ... | \n ... | \n ... | \n
\n \n | 2536 | \n 2021-12-27 | \n 0.000832 | \n 0.1425 | \n
\n \n | 2537 | \n 2021-12-28 | \n 0.008232 | \n 0.0567 | \n
\n \n | 2538 | \n 2021-12-29 | \n -0.008068 | \n 0.1075 | \n
\n \n | 2539 | \n 2021-12-30 | \n 0.009031 | \n 0.1670 | \n
\n \n | 2540 | \n 2021-12-31 | \n 0.005332 | \n -0.0086 | \n
\n \n
\n
2541 rows × 3 columns
\n
"
},
"execution_count": 61,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"print(ret)\n",
"codelist=ret['code'].unique()\n",
"first=ret[ret['code']==codelist[0]]\n",
"for i in codelist[1:]:\n",
" first=pd.merge(left=first,right=ret[ret['code']==i],how='inner',on='date')\n",
"first['date']\n",
"factors['date']=pd.to_datetime(factors['date'],format='%Y/%m/%d %H:%M:%S')\n",
"first['date']=pd.to_datetime(first['date'],format='%Y-%m-%d %H:%M:%S')\n",
"ret['date']=pd.to_datetime(ret['date'],format='%Y-%m-%d %H:%M:%S')\n",
"\n",
"ret=pd.merge(left=ret,right=first['date'],how='inner',on='date')\n",
"factors=pd.merge(left=factors,right=first['date'],how='inner',on='date')\n",
"print(ret)\n",
"factors"
],
"metadata": {
"collapsed": false
}
},
{
"cell_type": "code",
"execution_count": 59,
"outputs": [
{
"data": {
"text/plain": "1"
},
"execution_count": 59,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"1"
],
"metadata": {
"collapsed": false
}
},
{
"cell_type": "code",
"execution_count": 63,
"outputs": [
{
"data": {
"text/plain": " mkt ten_return\ncode \n1 0 NaN NaN\n 1 NaN NaN\n 2 NaN NaN\n 3 NaN NaN\n 4 NaN NaN\n... ... ...\n601998 2536 0.073522 -0.007918\n 2537 0.088027 -0.007224\n 2538 0.098329 -0.007488\n 2539 0.069860 -0.009307\n 2540 0.064703 -0.008807\n\n[40656 rows x 2 columns]",
"text/html": "\n\n
\n \n \n | \n | \n mkt | \n ten_return | \n
\n \n | code | \n | \n | \n | \n
\n \n \n \n | 1 | \n 0 | \n NaN | \n NaN | \n
\n \n | 1 | \n NaN | \n NaN | \n
\n \n | 2 | \n NaN | \n NaN | \n
\n \n | 3 | \n NaN | \n NaN | \n
\n \n | 4 | \n NaN | \n NaN | \n
\n \n | ... | \n ... | \n ... | \n ... | \n
\n \n | 601998 | \n 2536 | \n 0.073522 | \n -0.007918 | \n
\n \n | 2537 | \n 0.088027 | \n -0.007224 | \n
\n \n | 2538 | \n 0.098329 | \n -0.007488 | \n
\n \n | 2539 | \n 0.069860 | \n -0.009307 | \n
\n \n | 2540 | \n 0.064703 | \n -0.008807 | \n
\n \n
\n
40656 rows × 2 columns
\n
"
},
"execution_count": 63,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"factors=factors[['mkt','ten_return']]\n",
"for stockcode,stockret in ret.groupby('code'):\n",
" trans=stockret['code_return'].tolist()\n",
" trans=pd.DataFrame(trans,columns=['code_return'])\n",
" mod_ts = RollingOLS(trans,sm.add_constant(factors),window=24).fit(cov_type='HAC',use_t=True)\n",
" res_ts[stockcode] = mod_ts.params\n",
"# 获取因子载荷序列\n",
"\n",
"betas = pd.concat({k: pd.DataFrame(v) for k, v in res_ts.items()},axis=0,names=['code']) # 所有的参数估计值。这里的names只能对已经有了的索引进行命名\n",
"loadings = betas.drop('const',axis=1) # 去掉常数项\n",
"loadings\n"
],
"metadata": {
"collapsed": false
}
},
{
"cell_type": "code",
"execution_count": 64,
"outputs": [
{
"data": {
"text/plain": " date code_return\ncode \n1 0 2010-09-02 0.0388\n2142 1 2010-09-02 -0.0038\n600000 2 2010-09-02 0.0057\n600015 3 2010-09-02 0.0017\n600016 4 2010-09-02 0.0019\n... ... ...\n601398 40651 2021-12-31 -0.0022\n601818 40652 2021-12-31 0.0000\n601939 40653 2021-12-31 -0.0017\n601988 40654 2021-12-31 0.0000\n601998 40655 2021-12-31 0.0022\n\n[40656 rows x 2 columns]",
"text/html": "\n\n
\n \n \n | \n | \n date | \n code_return | \n
\n \n | code | \n | \n | \n | \n
\n \n \n \n | 1 | \n 0 | \n 2010-09-02 | \n 0.0388 | \n
\n \n | 2142 | \n 1 | \n 2010-09-02 | \n -0.0038 | \n
\n \n | 600000 | \n 2 | \n 2010-09-02 | \n 0.0057 | \n
\n \n | 600015 | \n 3 | \n 2010-09-02 | \n 0.0017 | \n
\n \n | 600016 | \n 4 | \n 2010-09-02 | \n 0.0019 | \n
\n \n | ... | \n ... | \n ... | \n ... | \n
\n \n | 601398 | \n 40651 | \n 2021-12-31 | \n -0.0022 | \n
\n \n | 601818 | \n 40652 | \n 2021-12-31 | \n 0.0000 | \n
\n \n | 601939 | \n 40653 | \n 2021-12-31 | \n -0.0017 | \n
\n \n | 601988 | \n 40654 | \n 2021-12-31 | \n 0.0000 | \n
\n \n | 601998 | \n 40655 | \n 2021-12-31 | \n 0.0022 | \n
\n \n
\n
40656 rows × 2 columns
\n
"
},
"execution_count": 64,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"temp = ret.set_index('code',append=True).swaplevel()\n",
"temp"
],
"metadata": {
"collapsed": false
}
},
{
"cell_type": "code",
"execution_count": 65,
"outputs": [
{
"data": {
"text/plain": " date code_return\ncode \n1 0 NaT NaN\n2142 1 NaT NaN\n600000 2 NaT NaN\n600015 3 NaT NaN\n600016 4 NaT NaN\n... ... ...\n601398 40651 2021-12-30 -0.0043\n601818 40652 2021-12-30 0.0000\n601939 40653 2021-12-30 0.0000\n601988 40654 2021-12-30 0.0000\n601998 40655 2021-12-30 -0.0043\n\n[40656 rows x 2 columns]",
"text/html": "\n\n
\n \n \n | \n | \n date | \n code_return | \n
\n \n | code | \n | \n | \n | \n
\n \n \n \n | 1 | \n 0 | \n NaT | \n NaN | \n
\n \n | 2142 | \n 1 | \n NaT | \n NaN | \n
\n \n | 600000 | \n 2 | \n NaT | \n NaN | \n
\n \n | 600015 | \n 3 | \n NaT | \n NaN | \n
\n \n | 600016 | \n 4 | \n NaT | \n NaN | \n
\n \n | ... | \n ... | \n ... | \n ... | \n
\n \n | 601398 | \n 40651 | \n 2021-12-30 | \n -0.0043 | \n
\n \n | 601818 | \n 40652 | \n 2021-12-30 | \n 0.0000 | \n
\n \n | 601939 | \n 40653 | \n 2021-12-30 | \n 0.0000 | \n
\n \n | 601988 | \n 40654 | \n 2021-12-30 | \n 0.0000 | \n
\n \n | 601998 | \n 40655 | \n 2021-12-30 | \n -0.0043 | \n
\n \n
\n
40656 rows × 2 columns
\n
"
},
"execution_count": 65,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"temp = temp.groupby('code').shift(1)\n",
"temp"
],
"metadata": {
"collapsed": false
}
},
{
"cell_type": "code",
"execution_count": 55,
"outputs": [
{
"ename": "KeyError",
"evalue": "'date'",
"output_type": "error",
"traceback": [
"\u001B[1;31m---------------------------------------------------------------------------\u001B[0m",
"\u001B[1;31mKeyError\u001B[0m Traceback (most recent call last)",
"\u001B[1;32m~\\AppData\\Local\\Temp\\ipykernel_8936\\2619771353.py\u001B[0m in \u001B[0;36m\u001B[1;34m\u001B[0m\n\u001B[1;32m----> 1\u001B[1;33m \u001B[0mfmdata\u001B[0m \u001B[1;33m=\u001B[0m \u001B[0mpd\u001B[0m\u001B[1;33m.\u001B[0m\u001B[0mmerge\u001B[0m\u001B[1;33m(\u001B[0m\u001B[0mtemp\u001B[0m\u001B[1;33m.\u001B[0m\u001B[0mdropna\u001B[0m\u001B[1;33m(\u001B[0m\u001B[1;33m)\u001B[0m\u001B[1;33m.\u001B[0m\u001B[0mreset_index\u001B[0m\u001B[1;33m(\u001B[0m\u001B[1;33m)\u001B[0m\u001B[1;33m,\u001B[0m\u001B[0mloadings\u001B[0m\u001B[1;33m.\u001B[0m\u001B[0mreset_index\u001B[0m\u001B[1;33m(\u001B[0m\u001B[1;33m)\u001B[0m\u001B[1;33m,\u001B[0m\u001B[0mon\u001B[0m\u001B[1;33m=\u001B[0m\u001B[1;33m[\u001B[0m\u001B[1;34m'code'\u001B[0m\u001B[1;33m,\u001B[0m\u001B[1;34m'date'\u001B[0m\u001B[1;33m]\u001B[0m\u001B[1;33m)\u001B[0m\u001B[1;33m\u001B[0m\u001B[1;33m\u001B[0m\u001B[0m\n\u001B[0m\u001B[0;32m 2\u001B[0m \u001B[0mfmdata\u001B[0m\u001B[1;33m\u001B[0m\u001B[1;33m\u001B[0m\u001B[0m\n",
"\u001B[1;32mE:\\Python37\\lib\\site-packages\\pandas\\core\\reshape\\merge.py\u001B[0m in \u001B[0;36mmerge\u001B[1;34m(left, right, how, on, left_on, right_on, left_index, right_index, sort, suffixes, copy, indicator, validate)\u001B[0m\n\u001B[0;32m 117\u001B[0m \u001B[0mcopy\u001B[0m\u001B[1;33m=\u001B[0m\u001B[0mcopy\u001B[0m\u001B[1;33m,\u001B[0m\u001B[1;33m\u001B[0m\u001B[1;33m\u001B[0m\u001B[0m\n\u001B[0;32m 118\u001B[0m \u001B[0mindicator\u001B[0m\u001B[1;33m=\u001B[0m\u001B[0mindicator\u001B[0m\u001B[1;33m,\u001B[0m\u001B[1;33m\u001B[0m\u001B[1;33m\u001B[0m\u001B[0m\n\u001B[1;32m--> 119\u001B[1;33m \u001B[0mvalidate\u001B[0m\u001B[1;33m=\u001B[0m\u001B[0mvalidate\u001B[0m\u001B[1;33m,\u001B[0m\u001B[1;33m\u001B[0m\u001B[1;33m\u001B[0m\u001B[0m\n\u001B[0m\u001B[0;32m 120\u001B[0m )\n\u001B[0;32m 121\u001B[0m \u001B[1;32mreturn\u001B[0m \u001B[0mop\u001B[0m\u001B[1;33m.\u001B[0m\u001B[0mget_result\u001B[0m\u001B[1;33m(\u001B[0m\u001B[1;33m)\u001B[0m\u001B[1;33m\u001B[0m\u001B[1;33m\u001B[0m\u001B[0m\n",
"\u001B[1;32mE:\\Python37\\lib\\site-packages\\pandas\\core\\reshape\\merge.py\u001B[0m in \u001B[0;36m__init__\u001B[1;34m(self, left, right, how, on, left_on, right_on, axis, left_index, right_index, sort, suffixes, copy, indicator, validate)\u001B[0m\n\u001B[0;32m 697\u001B[0m \u001B[0mself\u001B[0m\u001B[1;33m.\u001B[0m\u001B[0mright_join_keys\u001B[0m\u001B[1;33m,\u001B[0m\u001B[1;33m\u001B[0m\u001B[1;33m\u001B[0m\u001B[0m\n\u001B[0;32m 698\u001B[0m \u001B[0mself\u001B[0m\u001B[1;33m.\u001B[0m\u001B[0mjoin_names\u001B[0m\u001B[1;33m,\u001B[0m\u001B[1;33m\u001B[0m\u001B[1;33m\u001B[0m\u001B[0m\n\u001B[1;32m--> 699\u001B[1;33m ) = self._get_merge_keys()\n\u001B[0m\u001B[0;32m 700\u001B[0m \u001B[1;33m\u001B[0m\u001B[0m\n\u001B[0;32m 701\u001B[0m \u001B[1;31m# validate the merge keys dtypes. We may need to coerce\u001B[0m\u001B[1;33m\u001B[0m\u001B[1;33m\u001B[0m\u001B[1;33m\u001B[0m\u001B[0m\n",
"\u001B[1;32mE:\\Python37\\lib\\site-packages\\pandas\\core\\reshape\\merge.py\u001B[0m in \u001B[0;36m_get_merge_keys\u001B[1;34m(self)\u001B[0m\n\u001B[0;32m 1094\u001B[0m \u001B[1;32mif\u001B[0m \u001B[1;32mnot\u001B[0m \u001B[0mis_rkey\u001B[0m\u001B[1;33m(\u001B[0m\u001B[0mrk\u001B[0m\u001B[1;33m)\u001B[0m\u001B[1;33m:\u001B[0m\u001B[1;33m\u001B[0m\u001B[1;33m\u001B[0m\u001B[0m\n\u001B[0;32m 1095\u001B[0m \u001B[1;32mif\u001B[0m \u001B[0mrk\u001B[0m \u001B[1;32mis\u001B[0m \u001B[1;32mnot\u001B[0m \u001B[1;32mNone\u001B[0m\u001B[1;33m:\u001B[0m\u001B[1;33m\u001B[0m\u001B[1;33m\u001B[0m\u001B[0m\n\u001B[1;32m-> 1096\u001B[1;33m \u001B[0mright_keys\u001B[0m\u001B[1;33m.\u001B[0m\u001B[0mappend\u001B[0m\u001B[1;33m(\u001B[0m\u001B[0mright\u001B[0m\u001B[1;33m.\u001B[0m\u001B[0m_get_label_or_level_values\u001B[0m\u001B[1;33m(\u001B[0m\u001B[0mrk\u001B[0m\u001B[1;33m)\u001B[0m\u001B[1;33m)\u001B[0m\u001B[1;33m\u001B[0m\u001B[1;33m\u001B[0m\u001B[0m\n\u001B[0m\u001B[0;32m 1097\u001B[0m \u001B[1;32melse\u001B[0m\u001B[1;33m:\u001B[0m\u001B[1;33m\u001B[0m\u001B[1;33m\u001B[0m\u001B[0m\n\u001B[0;32m 1098\u001B[0m \u001B[1;31m# work-around for merge_asof(right_index=True)\u001B[0m\u001B[1;33m\u001B[0m\u001B[1;33m\u001B[0m\u001B[1;33m\u001B[0m\u001B[0m\n",
"\u001B[1;32mE:\\Python37\\lib\\site-packages\\pandas\\core\\generic.py\u001B[0m in \u001B[0;36m_get_label_or_level_values\u001B[1;34m(self, key, axis)\u001B[0m\n\u001B[0;32m 1777\u001B[0m \u001B[0mvalues\u001B[0m \u001B[1;33m=\u001B[0m \u001B[0mself\u001B[0m\u001B[1;33m.\u001B[0m\u001B[0maxes\u001B[0m\u001B[1;33m[\u001B[0m\u001B[0maxis\u001B[0m\u001B[1;33m]\u001B[0m\u001B[1;33m.\u001B[0m\u001B[0mget_level_values\u001B[0m\u001B[1;33m(\u001B[0m\u001B[0mkey\u001B[0m\u001B[1;33m)\u001B[0m\u001B[1;33m.\u001B[0m\u001B[0m_values\u001B[0m\u001B[1;33m\u001B[0m\u001B[1;33m\u001B[0m\u001B[0m\n\u001B[0;32m 1778\u001B[0m \u001B[1;32melse\u001B[0m\u001B[1;33m:\u001B[0m\u001B[1;33m\u001B[0m\u001B[1;33m\u001B[0m\u001B[0m\n\u001B[1;32m-> 1779\u001B[1;33m \u001B[1;32mraise\u001B[0m \u001B[0mKeyError\u001B[0m\u001B[1;33m(\u001B[0m\u001B[0mkey\u001B[0m\u001B[1;33m)\u001B[0m\u001B[1;33m\u001B[0m\u001B[1;33m\u001B[0m\u001B[0m\n\u001B[0m\u001B[0;32m 1780\u001B[0m \u001B[1;33m\u001B[0m\u001B[0m\n\u001B[0;32m 1781\u001B[0m \u001B[1;31m# Check for duplicates\u001B[0m\u001B[1;33m\u001B[0m\u001B[1;33m\u001B[0m\u001B[1;33m\u001B[0m\u001B[0m\n",
"\u001B[1;31mKeyError\u001B[0m: 'date'"
]
}
],
"source": [
"fmdata = pd.merge(temp.dropna().reset_index(),loadings.reset_index(),on=['code','date'])\n",
"fmdata"
],
"metadata": {
"collapsed": false
}
},
{
"cell_type": "code",
"execution_count": 7,
"outputs": [],
"source": [
"import pandas as pd"
],
"metadata": {
"collapsed": false
}
},
{
"cell_type": "code",
"execution_count": 8,
"outputs": [
{
"data": {
"text/plain": " stkcode date monret size ep\n0 1 200001 0.0619 1.985739e+10 0.019309\n1 1 200002 -0.0113 1.963234e+10 0.019531\n2 1 200003 0.0027 1.968592e+10 0.019478\n3 1 200004 0.0370 2.041464e+10 0.017718\n4 1 200005 -0.0551 1.928942e+10 0.018751\n... ... ... ... ... ...\n592718 900957 202108 -0.0589 1.116880e+08 0.018720\n592719 900957 202109 0.0906 1.218080e+08 0.017120\n592720 900957 202110 -0.0408 1.168400e+08 0.015647\n592721 900957 202111 -0.0409 1.120560e+08 0.016345\n592722 900957 202112 0.0361 1.161040e+08 0.015783\n\n[571479 rows x 5 columns]",
"text/html": "\n\n
\n \n \n | \n stkcode | \n date | \n monret | \n size | \n ep | \n
\n \n \n \n | 0 | \n 1 | \n 200001 | \n 0.0619 | \n 1.985739e+10 | \n 0.019309 | \n
\n \n | 1 | \n 1 | \n 200002 | \n -0.0113 | \n 1.963234e+10 | \n 0.019531 | \n
\n \n | 2 | \n 1 | \n 200003 | \n 0.0027 | \n 1.968592e+10 | \n 0.019478 | \n
\n \n | 3 | \n 1 | \n 200004 | \n 0.0370 | \n 2.041464e+10 | \n 0.017718 | \n
\n \n | 4 | \n 1 | \n 200005 | \n -0.0551 | \n 1.928942e+10 | \n 0.018751 | \n
\n \n | ... | \n ... | \n ... | \n ... | \n ... | \n ... | \n
\n \n | 592718 | \n 900957 | \n 202108 | \n -0.0589 | \n 1.116880e+08 | \n 0.018720 | \n
\n \n | 592719 | \n 900957 | \n 202109 | \n 0.0906 | \n 1.218080e+08 | \n 0.017120 | \n
\n \n | 592720 | \n 900957 | \n 202110 | \n -0.0408 | \n 1.168400e+08 | \n 0.015647 | \n
\n \n | 592721 | \n 900957 | \n 202111 | \n -0.0409 | \n 1.120560e+08 | \n 0.016345 | \n
\n \n | 592722 | \n 900957 | \n 202112 | \n 0.0361 | \n 1.161040e+08 | \n 0.015783 | \n
\n \n
\n
571479 rows × 5 columns
\n
"
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"monret=pd.read_csv('RESSET_MRESSTK_all.csv',usecols=[0,2,3,4,5,6],encoding='utf-8')\n",
"beta=pd.read_csv('RESSET_SMONRETBETA_BFDT12_all.csv',encoding='utf-8')\n",
"monret.columns=['stkcode','date','close','tshare','monret','pe']\n",
"beta.columns=['stkcode','date','beta']\n",
"monret['date']=pd.to_datetime(monret['date'],format='%Y/%m/%d')\n",
"monret['date']=monret['date'].dt.strftime('%Y%m').astype(int)\n",
"monret['size']=monret['close']*monret['tshare']\n",
"monret['ep']=1/monret['pe']\n",
"monret=monret[['stkcode','date','monret','size','ep']]\n",
"monret.dropna(inplace=True)\n",
"monret"
],
"metadata": {
"collapsed": false
}
},
{
"cell_type": "code",
"execution_count": 9,
"outputs": [
{
"data": {
"text/plain": " stkcode date beta\n0 1 200001 1.3269\n1 1 200002 1.2640\n2 1 200003 1.2676\n3 1 200004 1.2784\n4 1 200005 1.1890\n... ... ... ...\n535164 900957 202108 -0.6000\n535165 900957 202109 -0.9615\n535166 900957 202110 -0.7221\n535167 900957 202111 -1.4672\n535168 900957 202112 -1.3773\n\n[535169 rows x 3 columns]",
"text/html": "\n\n
\n \n \n | \n stkcode | \n date | \n beta | \n
\n \n \n \n | 0 | \n 1 | \n 200001 | \n 1.3269 | \n
\n \n | 1 | \n 1 | \n 200002 | \n 1.2640 | \n
\n \n | 2 | \n 1 | \n 200003 | \n 1.2676 | \n
\n \n | 3 | \n 1 | \n 200004 | \n 1.2784 | \n
\n \n | 4 | \n 1 | \n 200005 | \n 1.1890 | \n
\n \n | ... | \n ... | \n ... | \n ... | \n
\n \n | 535164 | \n 900957 | \n 202108 | \n -0.6000 | \n
\n \n | 535165 | \n 900957 | \n 202109 | \n -0.9615 | \n
\n \n | 535166 | \n 900957 | \n 202110 | \n -0.7221 | \n
\n \n | 535167 | \n 900957 | \n 202111 | \n -1.4672 | \n
\n \n | 535168 | \n 900957 | \n 202112 | \n -1.3773 | \n
\n \n
\n
535169 rows × 3 columns
\n
"
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"beta['date']=pd.to_datetime(beta['date'],format='%Y/%m/%d')\n",
"beta['date']=beta['date'].dt.strftime('%Y%m').astype(int)\n",
"beta.dropna()\n",
"beta"
],
"metadata": {
"collapsed": false
}
},
{
"cell_type": "code",
"execution_count": 10,
"outputs": [
{
"data": {
"text/plain": " stkcode date monret size ep beta\n0 1 200001 0.0619 1.985739e+10 0.019309 1.3269\n1 1 200002 -0.0113 1.963234e+10 0.019531 1.2640\n2 1 200003 0.0027 1.968592e+10 0.019478 1.2676\n3 1 200004 0.0370 2.041464e+10 0.017718 1.2784\n4 1 200005 -0.0551 1.928942e+10 0.018751 1.1890\n... ... ... ... ... ... ...\n534365 900957 202108 -0.0589 1.116880e+08 0.018720 -0.6000\n534366 900957 202109 0.0906 1.218080e+08 0.017120 -0.9615\n534367 900957 202110 -0.0408 1.168400e+08 0.015647 -0.7221\n534368 900957 202111 -0.0409 1.120560e+08 0.016345 -1.4672\n534369 900957 202112 0.0361 1.161040e+08 0.015783 -1.3773\n\n[534370 rows x 6 columns]",
"text/html": "\n\n
\n \n \n | \n stkcode | \n date | \n monret | \n size | \n ep | \n beta | \n
\n \n \n \n | 0 | \n 1 | \n 200001 | \n 0.0619 | \n 1.985739e+10 | \n 0.019309 | \n 1.3269 | \n
\n \n | 1 | \n 1 | \n 200002 | \n -0.0113 | \n 1.963234e+10 | \n 0.019531 | \n 1.2640 | \n
\n \n | 2 | \n 1 | \n 200003 | \n 0.0027 | \n 1.968592e+10 | \n 0.019478 | \n 1.2676 | \n
\n \n | 3 | \n 1 | \n 200004 | \n 0.0370 | \n 2.041464e+10 | \n 0.017718 | \n 1.2784 | \n
\n \n | 4 | \n 1 | \n 200005 | \n -0.0551 | \n 1.928942e+10 | \n 0.018751 | \n 1.1890 | \n
\n \n | ... | \n ... | \n ... | \n ... | \n ... | \n ... | \n ... | \n
\n \n | 534365 | \n 900957 | \n 202108 | \n -0.0589 | \n 1.116880e+08 | \n 0.018720 | \n -0.6000 | \n
\n \n | 534366 | \n 900957 | \n 202109 | \n 0.0906 | \n 1.218080e+08 | \n 0.017120 | \n -0.9615 | \n
\n \n | 534367 | \n 900957 | \n 202110 | \n -0.0408 | \n 1.168400e+08 | \n 0.015647 | \n -0.7221 | \n
\n \n | 534368 | \n 900957 | \n 202111 | \n -0.0409 | \n 1.120560e+08 | \n 0.016345 | \n -1.4672 | \n
\n \n | 534369 | \n 900957 | \n 202112 | \n 0.0361 | \n 1.161040e+08 | \n 0.015783 | \n -1.3773 | \n
\n \n
\n
534370 rows × 6 columns
\n
"
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"panel=pd.merge(left=monret,right=beta,on=['stkcode','date'],how='inner')\n",
"panel"
],
"metadata": {
"collapsed": false
}
},
{
"cell_type": "code",
"execution_count": 11,
"outputs": [
{
"data": {
"text/plain": " const size ep beta\n0 -0.028908 2.706794e-11 0.151357 0.150815\n1 0.088102 1.410143e-11 -0.044479 0.018328\n2 0.177329 -1.374837e-11 -0.306896 -0.054933\n3 0.037395 1.351040e-11 0.155326 -0.042211\n4 0.069028 -2.060338e-11 0.136973 0.018272\n.. ... ... ... ...\n259 -0.021313 1.262353e-13 -0.013304 -0.003581\n260 0.083763 -2.534541e-13 -0.025108 0.021680\n261 0.038268 -3.333640e-14 -0.071991 0.005694\n262 0.057655 -1.050947e-13 0.107436 0.099658\n263 0.111225 -1.832814e-13 -0.389525 -0.007620\n\n[264 rows x 4 columns]",
"text/html": "\n\n
\n \n \n | \n const | \n size | \n ep | \n beta | \n
\n \n \n \n | 0 | \n -0.028908 | \n 2.706794e-11 | \n 0.151357 | \n 0.150815 | \n
\n \n | 1 | \n 0.088102 | \n 1.410143e-11 | \n -0.044479 | \n 0.018328 | \n
\n \n | 2 | \n 0.177329 | \n -1.374837e-11 | \n -0.306896 | \n -0.054933 | \n
\n \n | 3 | \n 0.037395 | \n 1.351040e-11 | \n 0.155326 | \n -0.042211 | \n
\n \n | 4 | \n 0.069028 | \n -2.060338e-11 | \n 0.136973 | \n 0.018272 | \n
\n \n | ... | \n ... | \n ... | \n ... | \n ... | \n
\n \n | 259 | \n -0.021313 | \n 1.262353e-13 | \n -0.013304 | \n -0.003581 | \n
\n \n | 260 | \n 0.083763 | \n -2.534541e-13 | \n -0.025108 | \n 0.021680 | \n
\n \n | 261 | \n 0.038268 | \n -3.333640e-14 | \n -0.071991 | \n 0.005694 | \n
\n \n | 262 | \n 0.057655 | \n -1.050947e-13 | \n 0.107436 | \n 0.099658 | \n
\n \n | 263 | \n 0.111225 | \n -1.832814e-13 | \n -0.389525 | \n -0.007620 | \n
\n \n
\n
264 rows × 4 columns
\n
"
},
"execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import statsmodels.api as sm\n",
"coefficient=[]\n",
"for date in panel['date'].unique():\n",
" panel_date=panel[panel['date']==date]\n",
" X=panel_date[['size','ep','beta']]\n",
" X=sm.add_constant(X)\n",
" Y=panel_date['monret']\n",
" result=sm.OLS(Y,X).fit()\n",
" coefficient.append(result.params)\n",
"coefficient=pd.DataFrame(coefficient,columns=['const','size','ep','beta'])\n",
"coefficient"
],
"metadata": {
"collapsed": false
}
},
{
"cell_type": "code",
"execution_count": null,
"outputs": [],
"source": [],
"metadata": {
"collapsed": false
}
},
{
"cell_type": "code",
"execution_count": 12,
"outputs": [
{
"data": {
"text/plain": "['const: t_statistic:0.7631101972892858 p_value:0.4460816261704841',\n 'size: t_statistic:1.9299761192647984 p_value:0.05468514305777287',\n 'ep: t_statistic:-0.8640830459728995 p_value:0.3883296475253617',\n 'beta: t_statistic:2.0395581163718233 p_value:0.0423937550458561']"
},
"execution_count": 12,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"from scipy import stats\n",
"result=[]\n",
"for factor in coefficient.columns:\n",
" t_test_result, p_value=stats.ttest_1samp(coefficient[factor],0)\n",
" result.append(factor+': t_statistic:'+str(t_test_result)+' p_value:'+str(p_value))\n",
"result"
],
"metadata": {
"collapsed": false
}
},
{
"cell_type": "code",
"execution_count": 124,
"outputs": [
{
"data": {
"text/plain": " stkcode date monret beta\n0 1 200001 0.0619 1.3269\n1 1 200002 -0.0113 1.2640\n2 1 200003 0.0027 1.2676\n3 1 200004 0.0370 1.2784\n4 1 200005 -0.0551 1.1890\n... ... ... ... ...\n535164 900957 202108 -0.0589 -0.6000\n535165 900957 202109 0.0906 -0.9615\n535166 900957 202110 -0.0408 -0.7221\n535167 900957 202111 -0.0409 -1.4672\n535168 900957 202112 0.0361 -1.3773\n\n[535169 rows x 4 columns]",
"text/html": "\n\n
\n \n \n | \n stkcode | \n date | \n monret | \n beta | \n
\n \n \n \n | 0 | \n 1 | \n 200001 | \n 0.0619 | \n 1.3269 | \n
\n \n | 1 | \n 1 | \n 200002 | \n -0.0113 | \n 1.2640 | \n
\n \n | 2 | \n 1 | \n 200003 | \n 0.0027 | \n 1.2676 | \n
\n \n | 3 | \n 1 | \n 200004 | \n 0.0370 | \n 1.2784 | \n
\n \n | 4 | \n 1 | \n 200005 | \n -0.0551 | \n 1.1890 | \n
\n \n | ... | \n ... | \n ... | \n ... | \n ... | \n
\n \n | 535164 | \n 900957 | \n 202108 | \n -0.0589 | \n -0.6000 | \n
\n \n | 535165 | \n 900957 | \n 202109 | \n 0.0906 | \n -0.9615 | \n
\n \n | 535166 | \n 900957 | \n 202110 | \n -0.0408 | \n -0.7221 | \n
\n \n | 535167 | \n 900957 | \n 202111 | \n -0.0409 | \n -1.4672 | \n
\n \n | 535168 | \n 900957 | \n 202112 | \n 0.0361 | \n -1.3773 | \n
\n \n
\n
535169 rows × 4 columns
\n
"
},
"execution_count": 124,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"merge"
],
"metadata": {
"collapsed": false
}
},
{
"cell_type": "code",
"execution_count": 125,
"outputs": [
{
"data": {
"text/plain": " stkcode date monret beta\n0 1 200001 0.0619 1.3269\n1 1 200002 -0.0113 1.2640\n2 1 200003 0.0027 1.2676\n3 1 200004 0.0370 1.2784\n4 1 200005 -0.0551 1.1890\n... ... ... ... ...\n535164 900957 202108 -0.0589 -0.6000\n535165 900957 202109 0.0906 -0.9615\n535166 900957 202110 -0.0408 -0.7221\n535167 900957 202111 -0.0409 -1.4672\n535168 900957 202112 0.0361 -1.3773\n\n[535169 rows x 4 columns]",
"text/html": "\n\n
\n \n \n | \n stkcode | \n date | \n monret | \n beta | \n
\n \n \n \n | 0 | \n 1 | \n 200001 | \n 0.0619 | \n 1.3269 | \n
\n \n | 1 | \n 1 | \n 200002 | \n -0.0113 | \n 1.2640 | \n
\n \n | 2 | \n 1 | \n 200003 | \n 0.0027 | \n 1.2676 | \n
\n \n | 3 | \n 1 | \n 200004 | \n 0.0370 | \n 1.2784 | \n
\n \n | 4 | \n 1 | \n 200005 | \n -0.0551 | \n 1.1890 | \n
\n \n | ... | \n ... | \n ... | \n ... | \n ... | \n
\n \n | 535164 | \n 900957 | \n 202108 | \n -0.0589 | \n -0.6000 | \n
\n \n | 535165 | \n 900957 | \n 202109 | \n 0.0906 | \n -0.9615 | \n
\n \n | 535166 | \n 900957 | \n 202110 | \n -0.0408 | \n -0.7221 | \n
\n \n | 535167 | \n 900957 | \n 202111 | \n -0.0409 | \n -1.4672 | \n
\n \n | 535168 | \n 900957 | \n 202112 | \n 0.0361 | \n -1.3773 | \n
\n \n
\n
535169 rows × 4 columns
\n
"
},
"execution_count": 125,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"panel=merge\n",
"panel\n"
],
"metadata": {
"collapsed": false
}
},
{
"cell_type": "code",
"execution_count": 126,
"outputs": [
{
"data": {
"text/plain": " stkcode date monret beta\n0 1 2000-01-01 0.0619 1.3269\n1 1 2000-02-01 -0.0113 1.2640\n2 1 2000-03-01 0.0027 1.2676\n3 1 2000-04-01 0.0370 1.2784\n4 1 2000-05-01 -0.0551 1.1890\n... ... ... ... ...\n535164 900957 2021-08-01 -0.0589 -0.6000\n535165 900957 2021-09-01 0.0906 -0.9615\n535166 900957 2021-10-01 -0.0408 -0.7221\n535167 900957 2021-11-01 -0.0409 -1.4672\n535168 900957 2021-12-01 0.0361 -1.3773\n\n[535169 rows x 4 columns]",
"text/html": "\n\n
\n \n \n | \n stkcode | \n date | \n monret | \n beta | \n
\n \n \n \n | 0 | \n 1 | \n 2000-01-01 | \n 0.0619 | \n 1.3269 | \n
\n \n | 1 | \n 1 | \n 2000-02-01 | \n -0.0113 | \n 1.2640 | \n
\n \n | 2 | \n 1 | \n 2000-03-01 | \n 0.0027 | \n 1.2676 | \n
\n \n | 3 | \n 1 | \n 2000-04-01 | \n 0.0370 | \n 1.2784 | \n
\n \n | 4 | \n 1 | \n 2000-05-01 | \n -0.0551 | \n 1.1890 | \n
\n \n | ... | \n ... | \n ... | \n ... | \n ... | \n
\n \n | 535164 | \n 900957 | \n 2021-08-01 | \n -0.0589 | \n -0.6000 | \n
\n \n | 535165 | \n 900957 | \n 2021-09-01 | \n 0.0906 | \n -0.9615 | \n
\n \n | 535166 | \n 900957 | \n 2021-10-01 | \n -0.0408 | \n -0.7221 | \n
\n \n | 535167 | \n 900957 | \n 2021-11-01 | \n -0.0409 | \n -1.4672 | \n
\n \n | 535168 | \n 900957 | \n 2021-12-01 | \n 0.0361 | \n -1.3773 | \n
\n \n
\n
535169 rows × 4 columns
\n
"
},
"execution_count": 126,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"panel['date']=pd.to_datetime(panel['date'],format='%Y%m')##format不要忘了!!!!!!!\n",
"panel"
],
"metadata": {
"collapsed": false
}
},
{
"cell_type": "code",
"execution_count": 127,
"outputs": [
{
"data": {
"text/plain": " monret beta\nstkcode date \n1 2000-01-01 0.0619 1.3269\n 2000-02-01 -0.0113 1.2640\n 2000-03-01 0.0027 1.2676\n 2000-04-01 0.0370 1.2784\n 2000-05-01 -0.0551 1.1890\n... ... ...\n900957 2021-08-01 -0.0589 -0.6000\n 2021-09-01 0.0906 -0.9615\n 2021-10-01 -0.0408 -0.7221\n 2021-11-01 -0.0409 -1.4672\n 2021-12-01 0.0361 -1.3773\n\n[535169 rows x 2 columns]",
"text/html": "\n\n
\n \n \n | \n | \n monret | \n beta | \n
\n \n | stkcode | \n date | \n | \n | \n
\n \n \n \n | 1 | \n 2000-01-01 | \n 0.0619 | \n 1.3269 | \n
\n \n | 2000-02-01 | \n -0.0113 | \n 1.2640 | \n
\n \n | 2000-03-01 | \n 0.0027 | \n 1.2676 | \n
\n \n | 2000-04-01 | \n 0.0370 | \n 1.2784 | \n
\n \n | 2000-05-01 | \n -0.0551 | \n 1.1890 | \n
\n \n | ... | \n ... | \n ... | \n ... | \n
\n \n | 900957 | \n 2021-08-01 | \n -0.0589 | \n -0.6000 | \n
\n \n | 2021-09-01 | \n 0.0906 | \n -0.9615 | \n
\n \n | 2021-10-01 | \n -0.0408 | \n -0.7221 | \n
\n \n | 2021-11-01 | \n -0.0409 | \n -1.4672 | \n
\n \n | 2021-12-01 | \n 0.0361 | \n -1.3773 | \n
\n \n
\n
535169 rows × 2 columns
\n
"
},
"execution_count": 127,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"panel=panel.set_index(['stkcode','date'])\n",
"panel"
],
"metadata": {
"collapsed": false
}
},
{
"cell_type": "code",
"execution_count": 132,
"outputs": [
{
"data": {
"text/plain": "stkcode date \n1 2000-01-01 0.0619\n 2000-02-01 -0.0113\n 2000-03-01 0.0027\n 2000-04-01 0.0370\n 2000-05-01 -0.0551\n ... \n900957 2021-08-01 -0.0589\n 2021-09-01 0.0906\n 2021-10-01 -0.0408\n 2021-11-01 -0.0409\n 2021-12-01 0.0361\nName: monret, Length: 535169, dtype: float64"
},
"execution_count": 132,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"newmonret=panel['monret']\n",
"newmonret"
],
"metadata": {
"collapsed": false
}
},
{
"cell_type": "code",
"execution_count": 133,
"outputs": [
{
"data": {
"text/plain": "stkcode date \n1 2000-01-01 1.3269\n 2000-02-01 1.2640\n 2000-03-01 1.2676\n 2000-04-01 1.2784\n 2000-05-01 1.1890\n ... \n900957 2021-08-01 -0.6000\n 2021-09-01 -0.9615\n 2021-10-01 -0.7221\n 2021-11-01 -1.4672\n 2021-12-01 -1.3773\nName: beta, Length: 535169, dtype: float64"
},
"execution_count": 133,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"newbeta=panel['beta']\n",
"newbeta"
],
"metadata": {
"collapsed": false
}
},
{
"cell_type": "code",
"execution_count": 135,
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"E:\\Python37\\lib\\site-packages\\linearmodels\\shared\\exceptions.py:37: MissingValueWarning: \n",
"Inputs contain missing values. Dropping rows with missing observations.\n",
" warnings.warn(missing_value_warning_msg, MissingValueWarning)\n"
]
},
{
"data": {
"text/plain": "\n\"\"\"\n FamaMacBeth Estimation Summary \n=======================================================================================\nDep. Variable: monret R-squared: 0.0007\nEstimator: FamaMacBeth R-squared (Between): 0.0008\nNo. Observations: 535157 R-squared (Within): 0.0005\nDate: Mon, Apr 17 2023 R-squared (Overall): 0.0007\nTime: 14:36:40 Log-likelihood 2.489e+05\nCov. Estimator: Fama-MacBeth Standard Cov \n F-statistic: 369.58\nEntities: 4075 P-value 0.0000\nAvg Obs: 131.33 Distribution: F(1,535155)\nMin Obs: 1.0000 \nMax Obs: 264.00 F-statistic (robust): 3.7894\n P-value 0.0516\nTime periods: 264 Distribution: F(1,535155)\nAvg Obs: 2027.1 \nMin Obs: 840.00 \nMax Obs: 4008.0 \n \n Parameter Estimates \n==============================================================================\n Parameter Std. Err. T-stat P-value Lower CI Upper CI\n------------------------------------------------------------------------------\nconst 0.0039 0.0037 1.0558 0.2910 -0.0034 0.0113\nbeta 0.0096 0.0049 1.9466 0.0516 -6.59e-05 0.0193\n==============================================================================\n\"\"\"",
"text/html": "\nFamaMacBeth Estimation Summary\n\n | Dep. Variable: | monret | R-squared: | 0.0007 | \n
\n\n | Estimator: | FamaMacBeth | R-squared (Between): | 0.0008 | \n
\n\n | No. Observations: | 535157 | R-squared (Within): | 0.0005 | \n
\n\n | Date: | Mon, Apr 17 2023 | R-squared (Overall): | 0.0007 | \n
\n\n | Time: | 14:36:40 | Log-likelihood | 2.489e+05 | \n
\n\n | Cov. Estimator: | Fama-MacBeth Standard Cov | | | \n
\n\n | | F-statistic: | 369.58 | \n
\n\n | Entities: | 4075 | P-value | 0.0000 | \n
\n\n | Avg Obs: | 131.33 | Distribution: | F(1,535155) | \n
\n\n | Min Obs: | 1.0000 | | | \n
\n\n | Max Obs: | 264.00 | F-statistic (robust): | 3.7894 | \n
\n\n | | P-value | 0.0516 | \n
\n\n | Time periods: | 264 | Distribution: | F(1,535155) | \n
\n\n | Avg Obs: | 2027.1 | | | \n
\n\n | Min Obs: | 840.00 | | | \n
\n\n | Max Obs: | 4008.0 | | | \n
\n\n | | | | \n
\n
\n\nParameter Estimates\n\n | Parameter | Std. Err. | T-stat | P-value | Lower CI | Upper CI | \n
\n\n | const | 0.0039 | 0.0037 | 1.0558 | 0.2910 | -0.0034 | 0.0113 | \n
\n\n | beta | 0.0096 | 0.0049 | 1.9466 | 0.0516 | -6.59e-05 | 0.0193 | \n
\n
"
},
"execution_count": 135,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"mod=FamaMacBeth(newmonret,sm.add_constant(newbeta))\n",
"res=mod.fit()\n",
"res.summary\n",
"?"
],
"metadata": {
"collapsed": false
}
},
{
"cell_type": "code",
"execution_count": 136,
"outputs": [],
"source": [
"?FamaMacBeth"
],
"metadata": {
"collapsed": false
}
},
{
"cell_type": "code",
"execution_count": 3,
"outputs": [
{
"data": {
"text/plain": " a\n0 1\n1 2\n2 3",
"text/html": "\n\n
\n \n \n | \n a | \n
\n \n \n \n | 0 | \n 1 | \n
\n \n | 1 | \n 2 | \n
\n \n | 2 | \n 3 | \n
\n \n
\n
"
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import pandas as pd\n",
"m=pd.DataFrame({'a':[1,2,3],'b':[1,2,3]})\n",
"l=m.loc[:,['a']]\n",
"l"
],
"metadata": {
"collapsed": false
}
},
{
"cell_type": "code",
"execution_count": 6,
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"(1, 1)\n",
"(2, 2)\n",
"(3, 3)\n"
]
}
],
"source": [
"a=[1,2,3]\n",
"b=[1,2,3]\n",
"l=zip(a,b)\n",
"l\n",
"for m,p in l:\n",
" print(m,p)"
],
"metadata": {
"collapsed": false
}
},
{
"cell_type": "code",
"execution_count": null,
"outputs": [],
"source": [],
"metadata": {
"collapsed": false
}
}
],
"metadata": {
"kernelspec": {
"name": "python3",
"language": "python",
"display_name": "Python 3 (ipykernel)"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 2
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython2",
"version": "2.7.6"
}
},
"nbformat": 4,
"nbformat_minor": 0
}