#!coding=utf8
import numpy as np
from pandas import *
import subprocess
import json
from datetime import datetime, timedelta
import sys, os

# read the time variation factors
csvs={'m':'mon.csv','w':'week.csv','d':'day.csv'}
for t in 'mwd':
  df='df_A'+t
  try:
    exec(df+'=read_csv("'+df+'")')
  except:  
    sys.exit('df_A? not found, please re-run prep_dfAdmw.py ') 

#union of nsc2 in time variation files
s_nsc2=set(df_Ad.nsc2) | set(df_Am.nsc2) | set(df_Aw.nsc2)

#open the TEDS area csv file
pwd=subprocess.check_output('pwd',shell=True).decode('utf8').strip('\n')
teds=pwd.split('/')[3][4:6]
fname='areagrid'+teds+'LL.csv'
df = read_csv(fname)
df.drop_duplicates(inplace=True)
df=df.reset_index(drop=True)
if 'nsc2' not in df.columns:
  df.loc[df['NSC_SUB'].map(lambda x: (type(x)==float and np.isnan(x)==True) or ( x==' ')),'NSC_SUB']='b'
  df['nsc2']=[str(x)+y for x,y in zip(df['NSC'],df['NSC_SUB'])]
df.loc[df['DICT'].map(lambda x:np.isnan(x)==True),'DICT']=5300.
if 'CNTY' not in df.columns:
  df['CNTY']=[str(int(s/100)) for s in list(df['DICT'])]

#definition the coordinates of database
minx,miny=min(df.UTME),min(df.UTMN)
df.UTME=round(df.UTME-minx,-3)
df.UTMN=round(df.UTMN-miny,-3)
df['YX']=np.array(df.UTMN+df.UTME/1000,dtype=int)

#sum-up the grids which length maybe smaller than 1KM
cole=['EM_SOX','EM_NOX','EM_CO','EM_PM25','EM_PM','EM_NMHC']
coli=['CNTY', 'nsc2','YX']
df=pivot_table(df,index=coli,values=cole,aggfunc=np.sum).reset_index()
#nXXX:len of list XXX; dXXX:index dictionary of listXXX, iXXX:index of XXX
for c in ['CNTY','nsc2','YX']:
  exec(c+'=list(set(df.'+c+'))')
  exec(c+'.sort()')
  exec('n'+c+'=len('+c+')')
  exec('d'+c+'={'+c+'[i]:i for i in range(n'+c+')}')
  exec('df["i'+c+'"]=[d'+c+'[i] for i in df.'+c+']')

#same NSC in df_A? and df, but without NSC_SUB, all add tp s_nsc2
nsc2b=set([i for i in s_nsc2 if i[-1]=='b'])
for ii in nsc2b-set(df.nsc2):
  i=int(ii[:-1])
  s_nsc2=s_nsc2|set(df.loc[df.nsc2.map(lambda x:x[:-1]==i),'nsc2'])
#Tuple_of_Length ={0, 101, 10000, 10101, 131313, 171717, 202020, 212121, 250101}
n_rgn,s_rgn,d_rgn={},[],{}
for n in s_nsc2:
  tl_rgn=(len(df_Am[df_Am.nsc2==n])*100+len(df_Aw[df_Aw.nsc2==n]))*100+len(df_Ad[df_Ad.nsc2==n])
  s_rgn.append(tl_rgn)
  n_rgn[n]=tl_rgn
  d_rgn[tl_rgn]=n #recording last nsc2 for calling
s_rgn=set(s_rgn) #tuple of region numbers

yr=2016+(int(teds)-10)*3
bdate=datetime(yr,1,1)-timedelta(days=1)
nd365=365+2
if yr%4==0:nd365=366+2
nty=nd365*24
dts=[bdate+timedelta(days=i/24.) for i in range(nty)]

#n_cnty: the dictionary of nsc2 vs cnties applied
if os.path.exists('n_cnty.json'):
  with open('n_cnty.json', 'r', newline='') as jsonfile:
    n_cnty=json.load(jsonfile)
else:
  for kc in ['cnty','kpq']:
    jsname='d_'+kc+'.json'
    if os.path.exists(jsname):
      with open(jsname, 'r', newline='') as jsonfile:
        exec('d_'+kc+'=json.load(jsonfile)')
    else:
      sys.exit('d_'+kc+'.json not found, please re-run prep_dfAdmw.py ') 
  n_cntys={}
  for tl in s_rgn: #tuple of lengs
    n=d_rgn[tl]
    cntys=[]
    for t in 'mwd':
      exec('regs=list(df_A'+t+'.loc[df_A'+t+'.nsc2==n,"REGION"])')
      if len(regs)==0:continue
      for r in regs:
        if r in d_kpq:
          cntys+=[d_cnty[i] for i in d_kpq[r]]
        else:
          cntys+=[d_cnty[r]]
    cntys=list(set(cntys))
    for n in [i for i in n_rgn if n_rgn[i]==tl]:
      n_cntys[n]=cntys
  with open('n_cnty.json', 'w', newline='') as jsonfile:
    json.dump(n_cntys, jsonfile)


if not os.path.exists('nc_fac.json'):
  mns=np.array([dts[i].month-1 for i in range(nty)])
  wks=np.array([dts[i].weekday() for i in range(nty)])
  hrs=np.array([dts[i].hour for i in range(nty)])

  nts={'m':12,'w':7,'d':24}
  for t in 'mwd':
    exec('df_A=df_A'+t)
    df=DataFrame({})
    df_A['CNTY']=0
    for i in range(len(df_A)):
      cntys=[]
      a=df_A.loc[i]
      r=df_A.loc[i,'REGION']
      if r in d_kpq:
        cntys+=[d_cnty[i] for i in d_kpq[r]]
      else:
        cntys+=[d_cnty[r]]
      for c in cntys:
        a.CNTY=c
        df=df.append(a,ignore_index=True)
    f_A={}
    for j in range(len(df)):
      n=df.loc[j,'nsc2']
      c=df.loc[j,'CNTY']
      f_A[(n,c)]=[df.loc[j,str(i)] for i in range(1,nts[t]+1)]
    exec('f_A'+t+'=f_A')
  for i in set(f_Am)-set(f_Aw):
    f_Aw[i]=np.ones(shape=7)  
  for i in set(f_Am)-set(f_Ad):
    f_Ad[i]=np.ones(shape=24)

  nc_fac={}
  for n in s_nsc2:
    for c in n_cntys[n]:
      tup=(n,c)
      tp=n+'_'+c
      if tup not in f_Am:continue
      lfac=np.array([f_Am[tup][m]*f_Aw[tup][w]*f_Ad[tup][d] for m,w,d in zip(mns,wks,hrs)])
      sfac=sum(lfac)
      nc_fac[tp]=list(lfac/sfac)

  with open('nc_fac.json', 'w', newline='') as jsonfile:
    json.dump(nc_fac, jsonfile)