#coding=utf-8
%matplotlib inline
import matplotlib.pyplot as plt
import sys
from imp import reload
reload(sys)
plt.rc('font', family='SimHei', size=13)
from __future__ import division
import pandas as pd
from IPython.display import Image
import pandas as pd
import numpy as np
problemPlat=pd.read_csv('problemPlat.csv',parse_dates=True)#问题平台 时间序列
problemPlat.index=problemPlat['proTime']
problemPlat.index=problemPlat.index.to_datetime()#index 转化为 datetime类型
problemPlat
problemPlat['id']['2012':'2017'].resample('M',how='count').plot(title='P2P发生问题')#发生问题P2P平台数量 随时间变化趋势
#问题分类
data1=problemPlat[problemPlat['type']=='跑路']
data2=problemPlat[problemPlat['type']=='停业']
data3=problemPlat[problemPlat['type']=='提现困难']
data4=problemPlat[problemPlat['type']=='经侦介入']
data5=problemPlat[problemPlat['type']=='转型']
figure,axes=plt.subplots(nrows=5,ncols=1,sharey=True,figsize=(12,7))
data1['id']['2012':'2017'].resample('M',how='count').plot(label='跑路平台数量',ax=axes[0])
data2['id']['2012':'2017'].resample('M',how='count').plot(label='停业平台数量',ax=axes[1])
data3['id']['2012':'2017'].resample('M',how='count').plot(label='提现困难平台数量',ax=axes[2])
data4['id']['2012':'2017'].resample('M',how='count').plot(label='经侦介入平台数量',ax=axes[3])
data5['id']['2012':'2017'].resample('M',how='count').plot(label='转型平台数量',ax=axes[4])
axes[0].set_title('跑路')
axes[1].set_title('停业')
axes[2].set_title('提现困难')
axes[3].set_title('经侦介入')
axes[4].set_title('转型')
plt.subplots_adjust(top=3)
Image(filename='各类问题平台变化趋势.png', width=1024)
problemPlat.groupby('area').count().sort_values('id')['id'].plot(kind='bar',figsize=(12,7),title='问题平台各省分布')#各省问题平台数量
plt.legend(loc='best')
Image(filename='P2P各省问题平台数量分布.png', width=1024)#地图分布
Image(filename='P2P问题平台各省注册资本总和(单位:万元).png', width=1024)#以注册资本为规模大小判定
Image(filename='P2P问题平台类型占比.png', width=1024)
problemPlatDetail=pd.read_csv('problemPlatDetail.csv',parse_dates=True)#平台详细信息 大部分字段 缺失
problemPlatDetail
platData=pd.read_csv('platData.csv',parse_dates=True)
Image(filename='6月513家大中型平台全国分布.png', width=1024)
juneData=platData[platData['month']=='2017-06']#六月份的平台数据
juneData['amount'].hist(normed=True)
juneData['amount'].plot(kind='kde',style='k--')#六月份交易量概率分布
np.log10(juneData['amount']).hist(normed=True)
np.log10(juneData['amount']).plot(kind='kde',style='k--')#取 10 对数的 概率分布
juneData[juneData['avgBidMoney']>1000]['avgBidMoney']
platData.loc[platData['avgBidMoney']>1000]=0.42
platData['avgBidMoney'].max()
#平均投标金额
platData['avgBidMoney'].hist()
platData[platData['month']=='2017-05']['amount'].hist(bins=30,normed=True)
platData[platData['month']=='2017-05']['amount'].plot(kind='kde')#五月成交量分布
np.log10(platData[platData['month']=='2017-05']['amount']).hist(bins=30,normed=True)
np.log10(platData[platData['month']=='2017-05']['amount']).plot(kind='kde') #五月成交量 取对数分布
platVolume=pd.read_csv('platVolume.csv',parse_dates=True)
platVolume['amount'].describe()
data59=platVolume[platVolume['wdzjPlatId']==59][['date','amount']]#获取陆金所成交量数据
data59.index=data59['date']
data59.index=pd.to_datetime(data59.index)
fig,ax=plt.subplots(ncols=1,nrows=1,figsize=(12,7))
data59['amount'].plot(label='陆金所成交量',ax=ax)
data59['amount'].rolling(window=50,min_periods=10).mean().plot(label='陆金所成50日每日成交量均线',ax=ax)
plt.legend(loc='best')
weixinIndex=pd.read_excel('weixinIndex.xlsx')
lujinWeixinIndex=weixinIndex[weixinIndex['word']=='陆金所']
lujinWeixinIndex.index=lujinWeixinIndex['date']
lujinWeixinIndex.index=pd.to_datetime(lujinWeixinIndex['date'])
lujinWeixinIndex.sort_index(inplace=True)#获取陆金所微信指数
lujinWeixinIndex
#陆金所成交额与微信指数相关系数变化趋势
pd.rolling_corr(data59['amount']['2017-05-12':'2017-07-03'],lujinWeixinIndex['index']['2017-05-12':'2017-07-03'],10,min_periods=10).plot(title='陆金所交易额与其微信指数相关系数变化趋势')
#可以看出成交额与微信指数明显正相关
#陆金所成交额变化趋势分析及预测
from fbprophet import Prophet
data59['y']=data59['amount']
data59['ds']=data59['date']
data59['ds']=pd.to_datetime(data59['ds'])
data59=data59[0:363]
m=Prophet(yearly_seasonality=True)
m.fit(data59)
future=m.make_future_dataframe(periods=365)
future.tail()
forecast=m.predict(future)
m.plot(forecast)
m.plot_components(forecast)
daySumData=platVolume.groupby('date').sum()#所有平台数据按日聚合
daySumData['y']=daySumData['amount']
daySumData['ds']=daySumData.index
daySumData
m=Prophet(weekly_seasonality=True)
#所有平台交易量趋势分析及预测
m.fit(daySumData)
future=m.make_future_dataframe(periods=365)
forecast=m.predict(future)
m.plot(forecast)
m.plot_components(forecast)
print('所有518家平台一年内的成交总和',platVolume['amount'].sum(),'万元')
platVolume.index
platVolume.index=platVolume['date']
platVolume.index=pd.to_datetime(platVolume.index)
platVolume.resample('M').sum()['amount'].plot(title='网贷之家513家平台月成交额变化趋势',xlim=('2016-07','2017-06'))
platAmount=platVolume.groupby('wdzjPlatId').sum()#按平台划分这一年内的成交额
platAmount
np.log10(platAmount['amount']).hist(bins=10,normed=True)
np.log10(platAmount['amount']).plot(kind='kde',style='k--',title='交易额的对数的直方图与概率分布')
platAmount['amount'].hist(bins=50,normed=True)
platAmount['amount'].plot(kind='kde',style='k--',title='交易额的直方图与概率分布')
platAmount['wdzjPlatId']=platAmount.index.to_series()
allPlatDayData=platVolume.resample('D',how='sum')
allPlatDayData['amount'].plot(title='全平台成交额变化趋势')
pd.rolling_mean(allPlatDayData['amount'],50,min_periods=10).plot(title='全平台50日成交日均值线')
pd.rolling_std(allPlatDayData['amount'],50,min_periods=10).plot(title='全平台50日每日成交额标准差')
lujinData=platVolume[platVolume['wdzjPlatId']==59]
corr=pd.rolling_corr(lujinData['amount'],allPlatDayData['amount'],50,min_periods=50).plot(title='陆金所交易额与所有平台交易额的相关系数变化趋势')
topTenData=platVolume[platVolume['wdzjPlatId'].isin([59,60,38,505,91,144,223,52,85,942])]#六月份前十家平台交易数据
topTenData=topTenData.resample('D').sum()
topTenData['amount'].plot(title='前十平台成交总额变化趋势及均线')
pd.rolling_mean(topTenData['amount'],50,min_periods=10).plot()
corr=pd.rolling_corr(topTenData['amount'],allPlatDayData['amount'],50,min_periods=50).plot(title='前十平台交易额与总交易额相关系数变化趋势')
platDataDetail=pd.read_csv('platDataDetail.csv')
carFinancePlat=platDataDetail[platDataDetail['businessType'].str.contains('车贷')==True]#经营包含车贷业务的平台
carFinanceData=platVolume[platVolume['wdzjPlatId'].isin(carFinancePlat['platId'].tolist())]#车贷平台成交额数据
carFinanceDayData=carFinanceData.resample('D').sum()['amount']
fig,axes=plt.subplots(nrows=1,ncols=2,sharey=True,figsize=(14,7))
carFinanceDayData.plot(ax=axes[0],title='车贷平台交易额')
allPlatDayData['amount'].plot(ax=axes[1],title='所有p2p平台交易额')
pd.rolling_mean(carFinanceDayData,50,min_periods=10).plot(title='车贷平台50日每日交易额均线')
pd.rolling_std(carFinanceDayData,50,min_periods=10).plot(title='车贷平台50日每日交易额标准差')
pd.rolling_corr(carFinanceDayData,allPlatDayData['amount'],50,min_periods=10).plot(title='车贷平台交易额与所有平台交易额相关系数')
carFinanceData['wdzjPlatId'].unique().size #做车贷业务的平台数量
carPlat=carFinanceData.groupby('wdzjPlatId').sum()['amount']
np.log10(carPlat).hist(normed=True)
np.log10(carPlat).plot(kind='kde',style='k--',title='车贷平台销售额分布')
from sklearn.mixture import GMM #高斯混合模型拟合
from scipy import stats
carPlatValue=np.log10(carPlat.values)[:,np.newaxis]
clf=GMM(4,n_iter=500).fit(carPlatValue)
xpdf = np.linspace(0, 10, 1000)
density = np.exp(clf.score(xpdf[:, np.newaxis]))
plt.plot(xpdf, density, '-r')
np.log10(carPlat).hist(normed=True,alpha=0.3)
np.log10(carPlat).plot(kind='kde',style='k--',title='车贷平台销售额分布',figsize=(12,7))
for i in range(clf.n_components):
pdf = clf.weights_[i] * stats.norm(clf.means_[i, 0],
np.sqrt(clf.covars_[i, 0])).pdf(xpdf)
plt.fill(xpdf, pdf, facecolor='gray',
edgecolor='none', alpha=0.3)
clf.means_#高斯混合模型,每个模型的均值
clf.covars_#高斯混合模型 每个模型的方差
clf.weights_ #高斯混合模型 每个模型的权重
personalCreditPlat=platDataDetail[platDataDetail['businessType'].str.contains('个人信贷')==True]#经营 含有个人信贷业务平台的成交额
personalCreditData=platVolume[platVolume['wdzjPlatId'].isin(personalCreditPlat['platId'].tolist())]#个人信贷平台成交额数据
fig,axes=plt.subplots(nrows=2,ncols=1,figsize=(12,8))
pd.rolling_mean(personalCreditData.resample('D').sum()['amount'],50,min_periods=10).plot(ax=axes[0],title='个人信贷平台销售额50日每日销售额均线')
pd.rolling_mean(allPlatDayData['amount'],50,min_periods=10).plot(ax=axes[1],title='所有平台销售额50日每日销售额均线')
plt.subplots_adjust(hspace=0.5)
houseCreditPlat=platDataDetail[platDataDetail['businessType'].str.contains('房贷')==True]#经营 含有房贷业务平台的成交额
houseCreditData=platVolume[platVolume['wdzjPlatId'].isin(houseCreditPlat['platId'].tolist())]#房贷信贷平台成交额数据
houseCreditData.resample('D').sum()['amount'].rolling(window=50,min_periods=10,center=False).mean().plot(label='房贷平台50日每日成交额均线')
allPlatDayData['amount'].rolling(window=50,min_periods=10,center=False).mean().plot(label='全平台50日每日成交额均线')
plt.legend(loc='best')
platDataDetail=pd.merge(platDataDetail,platAmount,left_on='platId',right_on='wdzjPlatId')
areaData=pd.merge(platVolume,platDataDetail[['locationAreaName','platId']],left_on='wdzjPlatId',right_on='platId')
areaData.index=areaData['date']
areaData.index=pd.to_datetime(areaData.index)
areaData[areaData['locationAreaName']=='上海'].resample('D').sum()['amount'].rolling(window=40,min_periods=10).mean().plot(label='上海地区P2P平台50日每日成交额均线',figsize=(12,7))
areaData[areaData['locationAreaName']=='北京'].resample('D').sum()['amount'].rolling(window=40,min_periods=10).mean().plot(label='北京地区P2p平台50日每日成交额均线')
areaData[areaData['locationAreaName']=='广东'].resample('D').sum()['amount'].rolling(window=40,min_periods=10).mean().plot(label='广东地区p2p平台50日每日成交额均线')
plt.legend(loc='best')
plt.title('三大地区成交额走势对比')
Image(filename='全年成交额全国各省对比.png', width=1024)
incomeRateData=pd.read_csv('platIncomeRate.csv',parse_dates=True)#收益率数据
incomeRateData.index=incomeRateData['date']
incomeRateData.index=pd.to_datetime(incomeRateData.index)
incomeRateData[incomeRateData['wdzjPlatId']==59]['incomeRate'].plot(label='陆金所收益率变化趋势')
incomeRateData[incomeRateData['wdzjPlatId']==59]['incomeRate'].rolling(window=50,min_periods=10,center=False).mean().plot(label='陆金所收益率50日均线')
plt.legend(loc='best')
platVolume[platVolume['wdzjPlatId']==59]['amount'].plot(label='陆金所交易额变化趋势')
platVolume[platVolume['wdzjPlatId']==59]['amount'].rolling(window=50,min_periods=10,center=False).mean().plot(label='陆金所交易额50日每日均线')
plt.legend(loc='best')
lujinData['amount'].rolling(window=50,min_periods=10).corr(incomeRateData[incomeRateData['wdzjPlatId']==59]['incomeRate']).plot(title='陆金所成交额与收益率相关系数')
pd.rolling_corr(incomeRateData[incomeRateData['wdzjPlatId']==59]['incomeRate']['2017-05-12':'2017-07-03'],lujinWeixinIndex['index']['2017-05-12':'2017-07-03'],10,min_periods=10).plot(title='陆金所收益率与其微信指数相关系数变化趋势')
amount_incomeRate=pd.merge(incomeRateData,platVolume,on=['wdzjPlatId','date']) #连表
amount_incomeRate.index=amount_incomeRate.date
amount_incomeRate.index=pd.to_datetime(amount_incomeRate.index)#设置索引
amount_incomeRate=amount_incomeRate[amount_incomeRate['amount']!=0]#删除交易额为0的数据项,避免加权平均时总和为0
grouped=amount_incomeRate.groupby('date')
grouped['amount']
get_wavg=lambda g:np.average(g['incomeRate'],weights=g['amount'])#每日数据按照成交量分组加权平均
dayData=grouped.apply(get_wavg)
dayData#每日根据交易额加权平均后的收益率
dayData.index=pd.to_datetime(dayData.index)
dayData.plot(label='所有平台的平均收益率')
dayData.rolling(window=50,min_periods=10).mean().plot(label='所有网贷平台50日每日收益率均线')
plt.legend(loc='best')
import tushare as ts
ts.get_loan_rate() #存款利率
allPlatDayData['amount'].rolling(window=50,min_periods=10).corr(dayData).plot(label='所有平台交易额与收益率相关系数')
allPlatDayData['amount'].rolling(window=50,min_periods=10).corr(dayData).rolling(window=50,min_periods=10).mean().plot(label='均线')
plt.legend(loc='best')
grouped=amount_incomeRate.groupby('wdzjPlatId')
get_wavg=lambda g:np.average(g['incomeRate'],weights=g['amount'])#每个平台按每日成交量加权平均后的收益率
platData=grouped.apply(get_wavg)
platData.rename(columns={0:'incomeRate'},inplace=True)
platData.hist(normed=True,label='不同平台收益率分布直方图')
platData.plot(kind='kde',style='k--',label='收益率分布概率密度函数')
plt.legend(loc='best')
fig,axes=plt.subplots(nrows=1,ncols=1,figsize=(14,7))
platDataDetail.groupby('locationAreaName').count()['platId'].sort_values().plot(kind='bar',sort_columns=True,label='_nolegend_')
plt.legend(loc='best')
plt.xticks(rotation=40)
plt.xlabel('区域')
plt.ylabel('数量')
from pandas import DataFrame
platData=DataFrame(platData)
platData['wdzjPlatId']=platData.index.to_series()
platDataDetail=pd.merge(platDataDetail,platData,left_on='platId',right_on='wdzjPlatId')
platDataDetail.rename(columns={0:'incomeRate'},inplace=True)
plt.scatter(x=platDataDetail['incomeRate'],y=platDataDetail['amount'],label='成交量与收益率散点图')
plt.legend(loc='best')
plt.scatter(x=platDataDetail['incomeRate'],y=np.log10(platDataDetail['amount']),label='成交量对数与收益率散点图')
plt.legend(loc='best')
#取对数后 关系 虽然仍旧分散 但是有一定的负相关关系
#大平台收益率都偏低
fig,axes=plt.subplots(nrows=1,ncols=1,figsize=(14,7))
platDataDetail.groupby('locationAreaName').sum()['amount'].sort_values().plot(kind='bar',sort_columns=True,label='_nolegend_')
plt.legend(loc='best')
plt.xticks(rotation=40)
plt.xlabel('区域(单位:万元)')
plt.ylabel('成交额')
platDataDetail
grouped=platDataDetail.groupby('locationAreaName')
get_wavg=lambda g:np.average(g['incomeRate'],weights=g['amount'])#按成交额加权计算趋于内平台的收益率
areaIncomeRate=grouped.apply(get_wavg)
areaIncomeRate.sort_values().plot(kind='bar',figsize=(16,10),label='_nolegend_')
plt.xticks(rotation=40)
plt.xlabel('区域')
plt.ylabel('收益率')
dayIncomeRateData=DataFrame(dayData)
dayIncomeRateData.rename(columns={0:'incomeRateData'},inplace=True)
dayIncomeRateData['y']=dayIncomeRateData['incomeRateData']
dayIncomeRateData['ds']=pd.to_datetime(dayIncomeRateData.index)
from fbprophet import Prophet#收益率趋势预测
m=Prophet(yearly_seasonality=True)
m.fit(dayIncomeRateData)
future = m.make_future_dataframe(periods=365)
forecast = m.predict(future)
m.plot(forecast)
m.plot_components(forecast);
investorGradeData=pd.read_csv('platInvestorGrade.csv')#投资人数分级数据
investorGradeData.index=investorGradeData['date']
investorGradeData.index=pd.to_datetime(investorGradeData.index)
investorGradeData['total']=investorGradeData['qianPerson']+investorGradeData['wanPerson']+investorGradeData['shiWanPerson']+investorGradeData['baiWanPerson']
investorGradeData.resample('M').sum()['2016-06':'2017-05']['total'].plot(label='全平台投资人数变化趋势',figsize=(12,7))
investorGradeData.resample('M').sum()['2016-06':'2017-05']['qianPerson'].plot(label='全平台一万投资额以内投资人数变化趋势')
investorGradeData.resample('M').sum()['2016-06':'2017-05']['wanPerson'].plot(label='全平台投资额一万到十万投资人数变化趋势')
investorGradeData.resample('M').sum()['2016-06':'2017-05']['shiWanPerson'].plot(label='全平台投资额十万到百万投资人数变化趋势')
investorGradeData.resample('M').sum()['2016-06':'2017-05']['baiWanPerson'].plot(label='全平台投资额百万以上投资人数变化趋势')
plt.legend(loc='best')
(investorGradeData.resample('M').sum()['2016-06':'2017-05']['qianPerson']/investorGradeData.resample('M').sum()['2016-06':'2017-05']['total']).plot(label='全平台一万投资额以内投资人数占比变化趋势',figsize=(12,8))
(investorGradeData.resample('M').sum()['2016-06':'2017-05']['wanPerson']/investorGradeData.resample('M').sum()['2016-06':'2017-05']['total']).plot(label='全平台投资额一万到十万投资人数占比变化趋势')
(investorGradeData.resample('M').sum()['2016-06':'2017-05']['shiWanPerson']/investorGradeData.resample('M').sum()['2016-06':'2017-05']['total']).plot(label='全平台投资额十万到百万投资人数占比变化趋势')
(investorGradeData.resample('M').sum()['2016-06':'2017-05']['baiWanPerson']/investorGradeData.resample('M').sum()['2016-06':'2017-05']['total']).plot(label='全平台投资额百万以上投资人数占比变化趋势')
plt.legend(loc='lower center')
investorGradeData['2016-06':'2017-05'].groupby('wdzjPlatId')['total'].mean().hist(label='平台按投资人数规模分布',normed=True)
investorGradeData['2016-06':'2017-05'].groupby('wdzjPlatId')['total'].mean().plot(label='平台按投资人数概率分布',kind='kde')
plt.legend(loc='best')
np.log10(investorGradeData['2016-06':'2017-05'].groupby('wdzjPlatId')['total'].mean()).hist(label='平台按投资人数对数规模分布',normed=True,figsize=(12,7))
np.log10(investorGradeData['2016-06':'2017-05'].groupby('wdzjPlatId')['total'].mean()).plot(label='平台按投资人数对数概率分布',kind='kde')
plt.legend(loc='upper left')
platInvestorGradeData=investorGradeData['2016-06':'2017-05'].groupby('wdzjPlatId').mean()
platInvestorGradeData=DataFrame(platInvestorGradeData)
platInvestorGradeData['wdzjPlatId']=platInvestorGradeData.index.to_series()
platInvestorGradeData.columns=['id','investQianPerson','investWanPerson','investShiWanPerson','investBaiWanPerson','investTotal','wdzjPlatId']
platDataDetail=pd.merge(platDataDetail,platInvestorGradeData[['investQianPerson','investWanPerson','investShiWanPerson',
'investBaiWanPerson','investTotal','wdzjPlatId']],left_on='platId',right_on='wdzjPlatId')
platDataDetail
platDataDetail[platDataDetail['businessType'].str.contains('车贷\(100\%\)')==True]['investTotal'].sum()/platInvestorGradeData['investTotal'].sum()
#t投资纯车贷平台的投资人数与所有投资人数的占比
platDataDetail[platDataDetail['businessType'].str.contains('个人信贷')==True]['investTotal'].sum()/platInvestorGradeData['investTotal'].sum()
#设计个人信贷的平台的投资人数与全平台投资人数占比
platDataDetail[platDataDetail['businessType'].str.contains('消费')==True]['investTotal'].sum()/platInvestorGradeData['investTotal'].sum()
#设计消费贷的平台的投资人数与全平台投资人数占比
platDataDetail[platDataDetail['bankCapital'].str.contains('银行')==True]['investTotal'].sum()/platInvestorGradeData['investTotal'].sum()
#开通银行存管的平台的投资人数比例占全平台的投资人数
fig = plt.figure()
ax1 = fig.add_subplot(111)
ax2=ax1.twinx()
dayData.resample('M').mean()['2016-07':'2017-05'].plot(ax=ax1,color='green')
ax1.set_ylabel('收益率变化趋势',rotation=90)
investorGradeData.resample('M').sum()['2016-07':'2017-05']['total'].plot(ax=ax2)
ax2.set_ylabel('投资人数变化趋势')
plt.legend(loc='upper left')
borrowerGradeData=pd.read_csv('platBorrowerGrade.csv')#借款人数分级数据
borrowerGradeData.index=borrowerGradeData['date']
borrowerGradeData.index=pd.to_datetime(borrowerGradeData.index)
borrowerGradeData['total']=borrowerGradeData['wanPerson']+borrowerGradeData['shiWanPerson']+borrowerGradeData['baiWanPerson']+borrowerGradeData['qianWanPerson']
borrowerGradeData.resample('M').sum()['total']['2016-06':'2017-05'].plot(title='借款人数变化趋势',figsize=(10,7))
borrowerGradeData[borrowerGradeData['date']=='2017-05-31']['total'].hist(normed=True,label='五月平台借款人数直方图')
borrowerGradeData[borrowerGradeData['date']=='2017-05-31']['total'].plot(kind='kde',style='k--',label='五月平台借款人数概率分布')
plt.legend(loc='best')
np.log10(borrowerGradeData[borrowerGradeData['date']=='2017-05-31']['total']+1).hist(normed=True,label='五月平台借款人数对数直方图',figsize=(12,7))
np.log10(borrowerGradeData[borrowerGradeData['date']=='2017-05-31']['total']+1).plot(kind='kde',style='k--',label='五月平台借款人数对数概率分布')
plt.legend(loc='upper left')
(borrowerGradeData.resample('M').sum()['2016-06':'2017-05']['qianWanPerson']/borrowerGradeData.resample('M').sum()['2016-06':'2017-05']['total']).plot(label='全平台千万借款额以内投资人数占比变化趋势',figsize=(12,8))
(borrowerGradeData.resample('M').sum()['2016-06':'2017-05']['wanPerson']/borrowerGradeData.resample('M').sum()['2016-06':'2017-05']['total']).plot(label='全平台借款额一万到十万投资人数占比变化趋势')
(borrowerGradeData.resample('M').sum()['2016-06':'2017-05']['shiWanPerson']/borrowerGradeData.resample('M').sum()['2016-06':'2017-05']['total']).plot(label='全平台借款额十万到百万投资人数占比变化趋势')
(borrowerGradeData.resample('M').sum()['2016-06':'2017-05']['baiWanPerson']/borrowerGradeData.resample('M').sum()['2016-06':'2017-05']['total']).plot(label='全平台借款额百万以上投资人数占比变化趋势')
plt.legend(loc='center')
fig,axes=plt.subplots(nrows=4,ncols=1,figsize=(12,40))
borrowerGradeData.resample('M').sum()['2016-06':'2017-05']['wanPerson'].plot(title='全平台借款额一万到十万投资人数变化趋势',ax=axes[0])
borrowerGradeData.resample('M').sum()['2016-06':'2017-05']['shiWanPerson'].plot(title='全平台借款额十万到百万投资人数变化趋势',ax=axes[1])
borrowerGradeData.resample('M').sum()['2016-06':'2017-05']['baiWanPerson'].plot(title='全平台借款额百万以上投资人数变化趋势',ax=axes[2])
borrowerGradeData.resample('M').sum()['2016-06':'2017-05']['qianWanPerson'].plot(title='全平台千万借款额以内投资人数变化趋势',ax=axes[3])
plt.legend(loc='best')
plt.subplots_adjust(hspace=1)
(borrowerGradeData.resample('M').sum()['2016-06':'2017-05']['qianWanPerson']).plot(label='全平台千万借款额以内投资人数变化趋势',figsize=(12,8))
(borrowerGradeData.resample('M').sum()['2016-06':'2017-05']['wanPerson']).plot(label='全平台借款额一万到十万投资人数变化趋势')
(borrowerGradeData.resample('M').sum()['2016-06':'2017-05']['shiWanPerson']).plot(label='全平台借款额十万到百万投资人数变化趋势')
(borrowerGradeData.resample('M').sum()['2016-06':'2017-05']['baiWanPerson']).plot(label='全平台借款额百万以上投资人数变化趋势')
plt.legend(loc='center')
platBorrowerData=borrowerGradeData.groupby('wdzjPlatId').mean()
platBorrowerData['wdzjPlatId']=platBorrowerData.index.to_series()
platBorrowerData.columns=['id','borrowWanPerson','borrowShiWanPerson','borrowBaiWanPerson','borrowQianWanPerson','borrowTotal','wdzjPlatId']
platDataDetail=pd.merge(platDataDetail,platBorrowerData[['borrowWanPerson','borrowShiWanPerson','borrowBaiWanPerson','borrowQianWanPerson','borrowTotal','wdzjPlatId']],
left_on='platId',right_on='wdzjPlatId')
platDataDetail['invest/borrow']=platDataDetail['investTotal']/platDataDetail['borrowTotal']#投资人与借款人比例
platDataDetail['invest/borrow'].sort_values()
platDataDetail['invest/borrow'].hist(normed=True)
platDataDetail['invest/borrow'].plot(kind='kde',style='k--',title='投资人数与借款人数比概率分布')
np.log10(platDataDetail['invest/borrow']).hist(normed=True)
np.log10(platDataDetail['invest/borrow']).plot(kind='kde',style='k--',title='投资人数与借款人数比对数概率分布')
plt.scatter(y=platDataDetail['amount'],x=platDataDetail['invest/borrow'])#成交量与投资与借款人数比散点图
plt.title('成交量与投资与借款人数比散点图')
plt.scatter(y=np.log10(platDataDetail['amount']),x=np.log10(platDataDetail['invest/borrow']))#成交量与投资与借款人数比散点图
plt.title('成交量对数与投资与借款人数比对数散点图')
plt.scatter(y=platDataDetail['amount'],x=np.log10(platDataDetail['invest/borrow']))#成交量与投资与借款人数比散点图
plt.title('成交量对数与投资与借款人数比对数散点图')
incomeRateOnPeriod=pd.read_csv('platIncomeRateByPeriod.csv',parse_dates=True)#不同期限标的收益率
get_std=lambda g:np.std([g['day'],g['oneMonth'],g['twoMonth'],g['threeMonth'],g['sixMonth']])#五种期限的投资收益率的标准差
incomeRateOnPeriod['std']=incomeRateOnPeriod.apply(get_std,axis=1)
incomeRateOnPeriod[incomeRateOnPeriod['date']=='2017-05-31']['std'].hist(normed=True,label='5月全平台不同期限标的收益率标准差分布直方图',figsize=(12,7))
incomeRateOnPeriod[incomeRateOnPeriod['date']=='2017-05-31']['std'].plot(kind='kde',label='5月全平台不同期限标的收益率标准差概率分布')
plt.legend(loc='best')
incomeRateOnPeriod.index=incomeRateOnPeriod['date']
incomeRateOnPeriod.index=pd.to_datetime(incomeRateOnPeriod.index)
incomeRateOnPeriod[incomeRateOnPeriod['wdzjPlatId']==59][['day','oneMonth','twoMonth','threeMonth','sixMonth']]['2016-06':'2017-05'].plot(title='陆金所各种期限标的收益率变化趋势')
incomeRateOnPeriod[incomeRateOnPeriod['wdzjPlatId']==85][['day','oneMonth','twoMonth','threeMonth','sixMonth']]['2016-06':'2017-05'].plot(title='宜人贷各种期限标的收益率变化趋势')
incomeRateOnPeriod[incomeRateOnPeriod['wdzjPlatId']==59][['day','oneMonth','twoMonth','threeMonth','sixMonth']]['2016-06':'2017-05'].plot(kind='bar',title='陆金所各种期限标的收益率变化趋势',
figsize=(12,7))
plt.xticks(rotation=40)
platIncomeRateOnPeriod=incomeRateOnPeriod.groupby('wdzjPlatId').mean()
platIncomeRateOnPeriod['wdzjPlatId']=platIncomeRateOnPeriod.index.to_series()
platIncomeRateOnPeriod.columns=['id','dayIncomeRate','oneMonthIncomeRate','twoMonthIncomeRate','threeMonthIncomeRate',
'sixMonthIncomeRate','incomeRateStd','wdzjPlatId']
platDataDetail=pd.merge(platDataDetail,platIncomeRateOnPeriod[['dayIncomeRate','oneMonthIncomeRate','twoMonthIncomeRate','threeMonthIncomeRate',
'sixMonthIncomeRate','incomeRateStd','wdzjPlatId']],left_on='platId',right_on='wdzjPlatId')
platDataDetail
platDataDetail['incomeRateStd'].sort_values()
fullTimeOnPeriod=pd.read_csv('platFullTimeByPeriod.csv',parse_dates=True)#不同期限标的满标时间
platFullTime=fullTimeOnPeriod.groupby('wdzjPlatId').max()#由于缺失数据较多,也就是有较多的0项,则取最大值最为平台的参考值
platFullTime
invNumVSBorNum=pd.read_csv('platInvNumVsBorNum.csv',parse_dates=True)#投资人数借款人数对比
invNumVSBorNum.index=invNumVSBorNum['date']
invNumVSBorNum.index=pd.to_datetime(invNumVSBorNum.index)
invNumVSBorNum[invNumVSBorNum['wdzjPlatId']==59][['invNum','borNum']].plot(title='陆金所投资人数借款人数变化趋势')
invNumVSBorNum[invNumVSBorNum['wdzjPlatId']==59][['borNum']].plot(title='陆金所投资人数借款人数变化趋势')
invNumVSBorNum[invNumVSBorNum['wdzjPlatId']==59][['invNum','borNum']].rolling(window=50,min_periods=10).mean().plot(title='陆金所投资人数借款人数50日每日均线')
invNumVSBorNum[invNumVSBorNum['wdzjPlatId']==59]['invNum'].rolling(window=50,min_periods=10).corr(invNumVSBorNum[invNumVSBorNum['wdzjPlatId']==59]['borNum']).plot(title='陆金所投资人数借款人数相关系数')
invNumVSBorNum[invNumVSBorNum['wdzjPlatId']==59]['invNum']['2017-05-12':'2017-07-03'].rolling(window=10,min_periods=10).corr(lujinWeixinIndex['index']['2017-05-12':'2017-07-03']).plot(title='陆金所投资人数与微信指数相关系数')
invNumVSBorNum[invNumVSBorNum['wdzjPlatId']==59]['borNum']['2017-05-12':'2017-07-03'].rolling(window=10,min_periods=10).corr(lujinWeixinIndex['index']['2017-05-12':'2017-07-03']).plot(title='陆金所借款人数与微信指数相关系数')
invNumVSBorNum[invNumVSBorNum['wdzjPlatId']==60][['invNum','borNum']].rolling(window=50,min_periods=10).mean().plot(title='红岭创投投资人数借款人数50日每日均线')
invNumVSBorNum[invNumVSBorNum['wdzjPlatId']==60]['invNum'].rolling(window=50,min_periods=10).corr(invNumVSBorNum[invNumVSBorNum['wdzjPlatId']==60]['borNum']).plot(title='红岭创投投资人数借款人数相关系数')
platVolume[platVolume['wdzjPlatId']==59]['amount'].rolling(window=50,min_periods=10).corr(invNumVSBorNum[invNumVSBorNum['wdzjPlatId']==59]['invNum']).plot(label='陆金所交易额与投资人数相关系数',figsize=(12,7))
platVolume[platVolume['wdzjPlatId']==59]['amount'].rolling(window=50,min_periods=10).corr(invNumVSBorNum[invNumVSBorNum['wdzjPlatId']==59]['invNum']).rolling(window=50,min_periods=10).mean().plot(label='陆金所交易额与投资人数相关系数50日每日均线',figsize=(12,7))
plt.legend(loc='best')
incomeRateData=incomeRateData['2016-07-05':'2017-07-02']#数据对齐
incomeRateData[incomeRateData['wdzjPlatId']==59]['incomeRate'].rolling(window=50,min_periods=10).corr(invNumVSBorNum[invNumVSBorNum['wdzjPlatId']==59]['invNum']['2016-07-05':'2017-07-02']).plot(label='陆金所收益率与投资人数相关系数',figsize=(12,7))
incomeRateData[incomeRateData['wdzjPlatId']==59]['incomeRate'].rolling(window=50,min_periods=10).corr(invNumVSBorNum[invNumVSBorNum['wdzjPlatId']==59]['invNum']['2016-07-05':'2017-07-02']).rolling(window=50,min_periods=10).mean().plot(label='陆金所收益率与投资人数相关系数50日每日均线',figsize=(12,7))
plt.legend(loc='best')
platVolume[platVolume['wdzjPlatId']==60]['amount'].rolling(window=50,min_periods=10).corr(invNumVSBorNum[invNumVSBorNum['wdzjPlatId']==60]['invNum']).plot(label='红岭创投交易额与投资人数相关系数',figsize=(12,7))
platVolume[platVolume['wdzjPlatId']==60]['amount'].rolling(window=50,min_periods=10).corr(invNumVSBorNum[invNumVSBorNum['wdzjPlatId']==60]['invNum']).rolling(window=50,min_periods=10).mean().plot(label='红岭创投交易额与投资人数相关系数50日每日均线',figsize=(12,7))
plt.legend(loc='best')
incomeRateData[incomeRateData['wdzjPlatId']==60]['incomeRate'].rolling(window=50,min_periods=10).corr(invNumVSBorNum[invNumVSBorNum['wdzjPlatId']==60]['invNum']['2016-07-05':'2017-07-02']).plot(label='红岭创投收益率与投资人数相关系数',figsize=(12,7))
incomeRateData[incomeRateData['wdzjPlatId']==60]['incomeRate'].rolling(window=50,min_periods=10).corr(invNumVSBorNum[invNumVSBorNum['wdzjPlatId']==60]['invNum']['2016-07-05':'2017-07-02']).rolling(window=50,min_periods=10).mean().plot(label='红岭创投收益率与投资人数相关系数50日每日均线',figsize=(12,7))
plt.legend(loc='best')
platVolume.resample('D').sum()['amount'].rolling(window=50,min_periods=10).corr(invNumVSBorNum.resample('D').sum()['invNum']).plot(label='全平台交易额与投资人数相关系数',figsize=(12,7))
plt.legend(loc='best')
dayData=DataFrame(dayData)
dayData.rename(columns={0:'incomeRate'},inplace=True)
invNumVSBorNum.resample('D').sum()['invNum']['2016-07-05':'2017-07-03'].rolling(window=50,min_periods=10).corr(dayData['2016-07-05':'2017-07-03']['incomeRate']).plot(label='全平台投资人数与收益率相关系数',figsize=(12,7))
plt.legend(loc='best')
invNumVSBorNum.resample('D').sum()[['invNum','borNum']].plot(title='全平台投资人数与借款人数变化趋势',figsize=(12,7))
invNumVSBorNum.resample('D').sum()['invNum'].rolling(window=50,min_periods=10).mean().plot(label='全平台投资人数50日每日均线',figsize=(12,7))
invNumVSBorNum.resample('D').sum()['borNum'].rolling(window=50,min_periods=10).mean().plot(label='全平台借款人数50日每日均线')
plt.legend(loc='best')
invNumVSBorNum.resample('D').sum()['invNum'].rolling(window=50,min_periods=10).corr(invNumVSBorNum.resample('D').sum()['borNum']).plot(label='全平台投资人数与借款人数相关系数',figsize=(12,7))
plt.legend(loc='best')
platInvNumVSBorNum=invNumVSBorNum.groupby('wdzjPlatId').mean()
platInvNumVSBorNum['wdzjPlatId']=platInvNumVSBorNum.index.to_series()
np.log10(platInvNumVSBorNum['invNum']).hist(label='投资人数对数分布直方图',figsize=(12,7),normed=True)
np.log10(platInvNumVSBorNum['invNum']).plot(kind='kde',label='投资人数对数概率分布图')
plt.legend(loc='best')#投资人数集中在白人规模
np.log10(platInvNumVSBorNum['borNum']).hist(label='借款人数对数分布直方图',figsize=(12,7),normed=True)
np.log10(platInvNumVSBorNum['borNum']).plot(kind='kde',label='借款人数对数概率分布图')
plt.legend(loc='best')#日均借款人数 集中在十位数这个量级
platDataDetail=pd.merge(platDataDetail,platInvNumVSBorNum[['invNum','borNum','wdzjPlatId']],left_on='platId',right_on='wdzjPlatId')
platDataDetail
invMeanVSBorMean=pd.read_csv('platInvMeanVsBorMean.csv',parse_dates=True)#人均投资金额与人均借款金额 单位万元
invMeanVSBorMean.index=invMeanVSBorMean['date']
invMeanVSBorMean.index=pd.to_datetime(invMeanVSBorMean.index)
invMeanVSBorMean[invMeanVSBorMean['wdzjPlatId']==59][['invMean','borMean']].plot(title='陆金所人均投资额与人均借款额',figsize=(12,7))
invMeanVSBorMean[invMeanVSBorMean['wdzjPlatId']==59]['invMean'].rolling(window=50,min_periods=10).mean().plot(label='陆金所50日每日人均投资额均线')
invMeanVSBorMean[invMeanVSBorMean['wdzjPlatId']==59]['borMean'].rolling(window=50,min_periods=10).mean().plot(label='陆金所50日每日人均借款额均线')
plt.legend(loc='best')
platInvMeanVSBorMean=invMeanVSBorMean.groupby('wdzjPlatId').mean()[['invMean','borMean']]
platInvMeanVSBorMean['wdzjPlatId']=platInvMeanVSBorMean.index.to_series()
np.log10(platInvMeanVSBorMean['invMean']).hist(normed=True,figsize=(12,7))
np.log10(platInvMeanVSBorMean['invMean']).plot(kind='kde',style='k--',title='人均投资额取对数分布情况')
np.log10(platInvMeanVSBorMean['borMean']).hist(normed=True,figsize=(12,7))
np.log10(platInvMeanVSBorMean['borMean']).plot(kind='kde',style='k--',title='人均借款额取对数分布情况')
np.log10(platInvMeanVSBorMean['borMean']/platInvMeanVSBorMean['invMean']).hist(normed=True)
np.log10(platInvMeanVSBorMean['borMean']/platInvMeanVSBorMean['invMean']).plot(kind='kde',title='借款投资比对数分布情况',figsize=(12,7))
print('借款投资比最大为:',(platInvMeanVSBorMean['borMean']/platInvMeanVSBorMean['invMean']).max())
print('借款投资比最小为:',(platInvMeanVSBorMean['borMean']/platInvMeanVSBorMean['invMean']).min())
invBorRatio=platInvMeanVSBorMean['borMean']/platInvMeanVSBorMean['invMean']
invBorRatio[invBorRatio==(platInvMeanVSBorMean['borMean']/platInvMeanVSBorMean['invMean']).max()]
platDataDetail[platDataDetail['platId']==827]['amount']
Image(filename='1.png', width=500)
invBorRatio[invBorRatio==(platInvMeanVSBorMean['borMean']/platInvMeanVSBorMean['invMean']).min()]
platDataDetail[platDataDetail['platId']==129]['amount']
platDataDetail[platDataDetail['platId']==129]
invMeanVSBorMean[invMeanVSBorMean['wdzjPlatId']==129]['invMean'].plot(title='点融网投资均值变化')
invMeanVSBorMean[invMeanVSBorMean['wdzjPlatId']==129]['borMean'].plot(title='点融网借款均值变化')
platInvMeanVSBorMean['borMean/invMean']=platInvMeanVSBorMean['borMean']/platInvMeanVSBorMean['invMean']
platDataDetail=pd.merge(platDataDetail,platInvMeanVSBorMean,left_on='platId',right_on='wdzjPlatId')
np.log10(platDataDetail[platDataDetail['businessType'].str.contains('企业')==True]['borMean/invMean']).hist(figsize=(12,7))#经营企业贷的 借款与投资均值比值 对数分布
np.log10(platDataDetail[platDataDetail['businessType'].str.contains('房贷')==True]['borMean/invMean']).hist(figsize=(12,7))
newNumVsOldNum=pd.read_csv('platNewNumVSOldNum.csv',parse_dates=True)#新投资人数与老投资人数
newNumVsOldNum.index=newNumVsOldNum['date']
newNumVsOldNum.index=pd.to_datetime(newNumVsOldNum.index)
newNumVsOldNum[newNumVsOldNum['wdzjPlatId']==59][['newNum','oldNum']].plot(title='陆金所新老投资人数变化趋势',figsize=(12,7))
newNumVsOldNum[newNumVsOldNum['wdzjPlatId']==59]['newNum'].rolling(window=50,min_periods=10).mean().plot(label='陆金所50日每日新投资人数均线')
newNumVsOldNum[newNumVsOldNum['wdzjPlatId']==59]['oldNum'].rolling(window=50,min_periods=10).mean().plot(label='陆金所50日每日老投资人数均线')
plt.legend(loc='best')
newNumVsOldNum.resample('D').sum()[['newNum','oldNum']].plot(title='全平台新老投资人数变化趋势',figsize=(12,7))
newNumVsOldNum.resample('D').sum()['newNum'].rolling(window=50,min_periods=10).mean().plot(label='全平台50日每日新投资人数均线',figsize=(12,7))
newNumVsOldNum.resample('D').sum()['oldNum'].rolling(window=50,min_periods=10).mean().plot(label='全平台50日每日老投资人数均线')
plt.legend(loc='best')
newNumVsOldNum.resample('D').sum()['newNum'].rolling(window=50,min_periods=10).corr(newNumVsOldNum.resample('D').sum()['oldNum']).plot(title='新老投资人数相关系数',figsize=(12,7))
newNumVsOldNum.resample('D').sum()['newNum']['2016-07-05':'2017-07-02'].rolling(window=50,min_periods=10).corr(dayData['incomeRate']['2016-07-05':'2017-07-02']).plot(title='新投资人数与收益率相关系数',figsize=(12,7))
newNumVsOldNum.resample('D').sum()['oldNum']['2016-07-05':'2017-07-02'].rolling(window=50,min_periods=10).corr(dayData['incomeRate']['2016-07-05':'2017-07-02']).plot(title='老投资人数与收益率相关系数',figsize=(12,7))
platNewNumVSOldNum=newNumVsOldNum.groupby('wdzjPlatId').sum()/365
platNewNumVSOldNum['wdzjPlatId']=platNewNumVSOldNum.index.to_series()
platDataDetail=pd.merge(platDataDetail,platNewNumVSOldNum,left_on='platId',right_on='wdzjPlatId')
platDataDetail
del platDataDetail['id_y']
newSumVsOldSum=pd.read_csv('platNewSumVSOldSum.csv',parse_dates=True)#新投资金额与老投资金额
newSumVsOldSum.index=newSumVsOldSum['date']
newSumVsOldSum.index=pd.to_datetime(newSumVsOldSum.index)
newSumVsOldSum[newSumVsOldSum['wdzjPlatId']==59][['newSum','oldSum']].plot(title='陆金所新老用户投资额变化')
newSumVsOldSum[newSumVsOldSum['wdzjPlatId']==59]['newSum'].rolling(window=50,min_periods=10).mean().plot(label='陆金所50日每日新用户投资额均线')
newSumVsOldSum[newSumVsOldSum['wdzjPlatId']==59]['oldSum'].rolling(window=50,min_periods=10).mean().plot(label='陆金所50日每日老用户投资额均线')
platVolume[platVolume['wdzjPlatId']==59]['amount'].rolling(window=50,min_periods=10).mean().plot(label='陆金所50日每日交易额均线',figsize=(12,7))
plt.legend(loc='best')
newSumVsOldSum.resample('D').sum()['newSum'].rolling(window=50,min_periods=10).mean().plot(label='全平台50日每日新用户投资额均线')
newSumVsOldSum.resample('D').sum()['oldSum'].rolling(window=50,min_periods=10).mean().plot(label='全平台50日每日老用户投资额均线')
platVolume.resample('D').sum()['amount'].rolling(window=50,min_periods=10).mean().plot(label='全平台50日每日交易额均线')
plt.legend(loc='best')
newSumVsOldSum.sort_values(by='newSum')
incomeRateData[incomeRateData['wdzjPlatId']==508]['incomeRate'].plot(label='金信网收益率曲线')
incomeRateData[incomeRateData['wdzjPlatId']==508]['incomeRate'].rolling(window=50,min_periods=10).mean().plot(label='金信网50日每日收益率曲线',figsize=(12,7))
plt.legend(loc='best')
meanPeriod=pd.read_csv('platPlatMeanPeriodVSAllMeanPeriod.csv',parse_dates=True)#平台平均投资期限
meanPeriod.index=meanPeriod['date']
meanPeriod.index=pd.to_datetime(meanPeriod.index)
meanPeriod[meanPeriod['wdzjPlatId']==59][['platMeanPeriod','allMeanPeriod']].plot(figsize=(12,7))
meanPeriod[meanPeriod['wdzjPlatId']==59]['platMeanPeriod'].rolling(window=50,min_periods=10).mean().plot(label='陆金所借款期限50日每日均值')
meanPeriod[meanPeriod['wdzjPlatId']==59]['allMeanPeriod'].rolling(window=50,min_periods=10).mean().plot(label='行业所借款期限50日每日均值')
plt.legend(loc='best')
meanPeriod[meanPeriod['wdzjPlatId']==59]['allMeanPeriod'].plot(figsize=(12,7),label='行业借款期限')
meanPeriod[meanPeriod['wdzjPlatId']==59]['allMeanPeriod'].rolling(window=50,min_periods=10).mean().plot(label='行业所借款期限50日每日均值')
plt.legend(loc='best')
meanPeriod=meanPeriod[meanPeriod['platMeanPeriod']!=0.00] #删除period等于0的日期 默认这些日期这些平台没有发标记录
platMeanPeriod=meanPeriod.groupby('wdzjPlatId').mean()
platMeanPeriod['wdzjPlatId']=platMeanPeriod.index.to_series()
platDataDetail=pd.merge(platDataDetail,platMeanPeriod[['platMeanPeriod','wdzjPlatId']],left_on='platId',right_on='wdzjPlatId')
del platDataDetail['wdzjPlatId']
del platDataDetail['wdzjPlatId_y']
del platDataDetail['wdzjPlatId_x']
platDataDetail
InflowVSToPay=pd.read_csv('platInflowVSToPay.csv',parse_dates=True)#平台资金流入与待还余额以及待还余额三十日平均
InflowVSToPay.index=InflowVSToPay['date']
InflowVSToPay.index=pd.to_datetime(InflowVSToPay.index)
print('陆金所单日资金净流入最大为 ',InflowVSToPay[InflowVSToPay['wdzjPlatId']==59]['inflow'].max())
print('陆金所单日资金净流入最小为 ',InflowVSToPay[InflowVSToPay['wdzjPlatId']==59]['inflow'].min())
InflowVSToPay[InflowVSToPay['wdzjPlatId']==59]['inflow'].sort_values()
print('陆金所资金流入天数',InflowVSToPay[(InflowVSToPay['wdzjPlatId']==59)&(InflowVSToPay['inflow']>0)].count()['id'])
print('陆金所资金流出天数',InflowVSToPay[(InflowVSToPay['wdzjPlatId']==59)&(InflowVSToPay['inflow']<0)].count()['id'])
InflowVSToPay[InflowVSToPay['wdzjPlatId']==59]['inflow'].plot(figsize=(12,7),label='陆金所资金净流入')
InflowVSToPay[InflowVSToPay['wdzjPlatId']==59]['inflow'].rolling(window=50,min_periods=10).mean().plot(label='陆金所50日每日资金净流入曲线')
plt.legend(loc='best')
InflowVSToPay[InflowVSToPay['wdzjPlatId']==59]['inflow'].rolling(window=50,min_periods=10).mean().plot(label='陆金所50日每日资金净流入均线',figsize=(12,7))
plt.legend(loc='best')
InflowVSToPay[InflowVSToPay['wdzjPlatId']==59]['toPay'].plot(label='陆金所待还余额变化趋势',figsize=(12,7))
InflowVSToPay[InflowVSToPay['wdzjPlatId']==59]['toPay'].rolling(window=50,min_periods=10).mean().plot(label='陆金所50日每日待还余额均线')
plt.legend(loc='best')
InflowVSToPay[InflowVSToPay['wdzjPlatId']==59]['toPay'].rolling(window=50,min_periods=10).corr(InflowVSToPay[InflowVSToPay['wdzjPlatId']==59]['inflow']).plot()
InflowVSToPay.resample('D').sum()['inflow'].plot(figsize=(12,7),title='全平台资金净流入')
InflowVSToPay.resample('D').sum()['toPay'].plot(figsize=(12,7),title='全平台待待还余额')
platInflowVSToPay=InflowVSToPay.groupby('wdzjPlatId').mean()[['inflow','toPay']]
np.log10(platInflowVSToPay['inflow']).hist(figsize=(12,7),normed=True)
np.log10(platInflowVSToPay['inflow']).plot(kind='kde',style='k--',title='按净流入 平台分布')
np.log10(platInflowVSToPay['toPay']).hist(figsize=(12,7),normed=True)
np.log10(platInflowVSToPay['toPay']).plot(kind='kde',style='k--',title='按待付 平台分布')
platInflowVSToPay['wdzjPlatId']=platInflowVSToPay.index.to_series()
platDataDetail=pd.merge(platDataDetail,platInflowVSToPay,left_on='platId',right_on='wdzjPlatId')
platDataDetail
del platDataDetail['wdzjPlatId']
loanNumVSLoanPeriod=pd.read_csv('platLoanNumVSLoanPeriod.csv',parse_dates=True)#平台借款标数和平均借款期限
loanNumVSLoanPeriod.index=loanNumVSLoanPeriod['date']
loanNumVSLoanPeriod.index=pd.to_datetime(loanNumVSLoanPeriod.index)
platLoanNumVSLoanPeriod=loanNumVSLoanPeriod.groupby('wdzjPlatId').mean()[['loanNum','loanPeriod']]
platLoanNumVSLoanPeriod['wdzjPlatId']=platLoanNumVSLoanPeriod.index.to_series()
platDataDetail=pd.merge(platDataDetail,platLoanNumVSLoanPeriod,left_on='platId',right_on='wdzjPlatId')
del platDataDetail['wdzjPlatId']
toPayNumVSToReceiveNum=pd.read_csv('platToPayNumVSToReceiveNum.csv',parse_dates=True)#平台待还借款人数和代收投资人数
toPayNumVSToReceiveNum.index=toPayNumVSToReceiveNum['date']
toPayNumVSToReceiveNum.index=pd.to_datetime(toPayNumVSToReceiveNum.index)
print('陆金所最大单日待还借款人数:',toPayNumVSToReceiveNum[toPayNumVSToReceiveNum['wdzjPlatId']==59]['toPayNum'].max())
print('陆金所最小单日待还借款人数:',toPayNumVSToReceiveNum[toPayNumVSToReceiveNum['wdzjPlatId']==59]['toPayNum'].min())
print('陆金所最大单日代收借款人数:',toPayNumVSToReceiveNum[toPayNumVSToReceiveNum['wdzjPlatId']==59]['toReceiveNum'].max())
print('陆金所最小单日代收借款人数:',toPayNumVSToReceiveNum[toPayNumVSToReceiveNum['wdzjPlatId']==59]['toReceiveNum'].min())
toPayNumVSToReceiveNum[toPayNumVSToReceiveNum['wdzjPlatId']==59][['toPayNum','toReceiveNum']].plot(figsize=(12,7),title='陆金所待还借款人数与待收投资人数')
toPayNumVSToReceiveNum.resample('D').sum()['toPayNum'].plot(label='全平台每日待还人数',figsize=(12,7))
toPayNumVSToReceiveNum.resample('D').sum()['toPayNum'].rolling(window=50,min_periods=10).mean().plot(label='全平台50日每日待还人数均线')
toPayNumVSToReceiveNum.resample('D').sum()['toReceiveNum'].plot(label='全平台每日代收人数')
toPayNumVSToReceiveNum.resample('D').sum()['toReceiveNum'].rolling(window=50,min_periods=10).mean().plot(label='全平台50日每日代收人数均线')
plt.legend(loc='best')
platToPayNumVsToReceiveNum=toPayNumVSToReceiveNum.groupby('wdzjPlatId').mean()[['toPayNum','toReceiveNum']]
np.log10(platToPayNumVsToReceiveNum['toPayNum']).hist(figsize=(12,7),normed=True)
np.log10(platToPayNumVsToReceiveNum['toPayNum']).plot(kind='kde',style='k--',title='平台待付人数分布')
np.log10(platToPayNumVsToReceiveNum['toReceiveNum']).hist(figsize=(12,7),normed=True)
np.log10(platToPayNumVsToReceiveNum['toReceiveNum']).plot(kind='kde',style='k--',title='平台待收人数分布')
platToPayNumVsToReceiveNum['wdzjPlatId']=platToPayNumVsToReceiveNum.index.to_series()
platDataDetail=pd.merge(platDataDetail,platToPayNumVsToReceiveNum,left_on='platId',right_on='wdzjPlatId')
del platDataDetail['wdzjPlatId']
toPay10VSToReceive10=pd.read_csv('platToPay10VSToReceive10.csv',parse_dates=True)#平台前十借款人待还金额占比与平台前十投资人待收金额占比
toPay10VSToReceive10.index=toPay10VSToReceive10['date']
toPay10VSToReceive10.index=pd.to_datetime(toPay10VSToReceive10.index)
fig,axes=plt.subplots(nrows=2,ncols=1,figsize=(12,7))
toPay10VSToReceive10[toPay10VSToReceive10['wdzjPlatId']==59]['toPay10'].plot(ax=axes[0],label='陆金所前十借款人待还所占比例')
toPay10VSToReceive10[toPay10VSToReceive10['wdzjPlatId']==59]['toReceive10'].plot(ax=axes[1],label='陆金所前十投资人代收所占比例')
plt.legend(loc='best')
toPay10VSToReceive10[toPay10VSToReceive10['toPay10']<0]
toPay10VSToReceive10[toPay10VSToReceive10['toReceive10']<0]
platToPay10VSToReceive10=toPay10VSToReceive10[toPay10VSToReceive10['date']=='2017-05-31']#取五月的数据作为平台数据参考
platToPay10VSToReceive10=platToPay10VSToReceive10[['toPay10','toReceive10','wdzjPlatId']]
platDataDetail=pd.merge(platDataDetail,platToPay10VSToReceive10,left_on='platId',right_on='wdzjPlatId')
platDataDetail
del platDataDetail['wdzjPlatId']
toPay50VSToReceive50=pd.read_csv('platToPay50VSToReceive50.csv',parse_dates=True)#平台前五十借款人待还金额占比与平台前五十投资人待收金额占比
toPay50VSToReceive50
platToPay50VSToReceive50=toPay50VSToReceive50[toPay50VSToReceive50['date']=='2017-05-31']#取五月数据作为平台数据
platToPay10VSToReceive50=platToPay50VSToReceive50[['toPay50','toReceive50','wdzjPlatId']]
platDataDetail=pd.merge(platDataDetail,platToPay50VSToReceive50,left_on='platId',right_on='wdzjPlatId')
del platDataDetail['wdzjPlatId']
platDataDetail
lujinIncome=incomeRateData[incomeRateData['wdzjPlatId']==59]
lujinIncome['y']=np.log(lujinIncome['incomeRate'])
lujinIncome['ds']=lujinIncome['date']
m=Prophet(weekly_seasonality=True)
m.fit(lujinIncome)
future=m.make_future_dataframe(periods=365)
forecast=m.predict(future)
m.plot(forecast)
m.plot_components(forecast)
m=Prophet(yearly_seasonality=True)
m.fit(lujinIncome)
future=m.make_future_dataframe(periods=365)
forecast=m.predict(future)
m.plot(forecast)
m.plot_components(forecast)
lujinAmount=platVolume[platVolume['wdzjPlatId']==59]
lujinAmount['y']=lujinAmount['amount']
lujinAmount['ds']=lujinAmount['date']
m=Prophet(yearly_seasonality=True)
m.fit(lujinAmount)
future=m.make_future_dataframe(periods=365)
forecast=m.predict(future)
m.plot(forecast)
m.plot_components(forecast)
lujinToPayNum=toPayNumVSToReceiveNum[toPayNumVSToReceiveNum['wdzjPlatId']==59]
lujinToPayNum['ds']=lujinToPayNum['date']
lujinToPayNum['y']=lujinToPayNum['toPayNum']
m=Prophet()
m.fit(lujinToPayNum)
future=m.make_future_dataframe(periods=365)
forecast=m.predict(future)
m.plot(forecast)
m.plot_components(forecast)
lujinToReceiveNum=lujinToPayNum
lujinToReceiveNum['y']=lujinToReceiveNum['toReceiveNum']
m=Prophet()
m.fit(lujinToReceiveNum)
future=m.make_future_dataframe(periods=365)
forecast=m.predict(future)
m.plot(forecast)
m.plot_components(forecast)
m=Prophet(yearly_seasonality=True)
m.fit(lujinToReceiveNum)
future=m.make_future_dataframe(periods=365)
forecast=m.predict(future)
m.plot(forecast)
m.plot_components(forecast)
platDataDetail