-- Create table with enhanced pitching stats and aggregate
-- over mulitple stints within same year and league
-- New stats: WHIP, KtoBB, FIP
DROP TABLE IF EXISTS public.pitching_enh;
CREATE TABLE public.pitching_enh DISTRIBUTE BY HASH(playerID) AS
SELECT lgID, teamID, playerID, yearID, 
       yearID-yearID%10 decadeID,
       SUM(W) W, SUM(L) L, SUM(G) G, SUM(GS) GS, SUM(CG) CG, SUM(SHO) SHO, SUM(SV) SV, SUM(IPouts) IPouts, 
       SUM(H) H, SUM(ER) ER, SUM(HR) HR, SUM(BB) BB, SUM(SO) SO, 
       CASE WHEN SUM(H)>0 AND SUM(BFP)-SUM(BB)-SUM(HBP) = 0 THEN 'Infinity'
            WHEN SUM(BFP)-SUM(BB)-SUM(HBP) = 0 THEN 'NaN'
            ELSE CAST(SUM(H) as double)/(SUM(BFP)-SUM(BB)-SUM(HBP)) 
       END BAOpp, 
       CASE WHEN SUM(ER)>0 AND SUM(IPouts)=0 THEN 'Infinity' 
            WHEN SUM(IPouts)=0 THEN 'NaN'
            ELSE CAST(9.*SUM(ER)/(SUM(IPouts)/3.) as double) 
       END ERA, 
       CASE WHEN SUM(ER)>0 AND SUM(IPouts)=0 THEN 'Infinity' 
            WHEN SUM(IPouts)=0 THEN 'NaN'
            WHEN CAST(9.*SUM(ER)/(SUM(IPouts)/3.) as double)=0 THEN 'NaN'
            ELSE LOG(CAST(9.*SUM(ER)/(SUM(IPouts)/3.) as double)) 
       END ERA_LOG,
       SUM(IBB) IBB, SUM(WP) WP, SUM(HBP) HBP, SUM(BK) BK, SUM(BFP) BFP, SUM(GF) GF, SUM(R) R, 
       SUM(cast(SH as integer)) SH, SUM(cast(SF as integer)) SF, SUM(cast(GIDP as integer)) GIDP,
       CASE WHEN SUM(BB)+SUM(H) > 0 AND SUM(IPouts)=0 THEN 'Infinity'
            WHEN SUM(IPouts)=0 THEN 'NaN'
            ELSE CAST((SUM(BB)+SUM(H))/(SUM(IPouts)/3.) as double) 
       END WHIP,
       CASE WHEN SUM(SO)>0 AND SUM(BB)=0 THEN 'Infinity'
            WHEN SUM(BB)=0 THEN 'NaN'
            ELSE CAST(SUM(SO) as double)/SUM(BB)  
       END KtoBB,
       CASE WHEN SUM(HR)*13.+(SUM(BB)+SUM(HBP)-SUM(IBB))*3.-SUM(SO)*2. > 0 AND SUM(IPouts)=0 THEN 'Infinity'
            WHEN SUM(HR)*13.+(SUM(BB)+SUM(HBP)-SUM(IBB))*3.-SUM(SO)*2. < 0 AND SUM(IPouts)=0 THEN '-Infinity'
            WHEN SUM(IPouts)=0 THEN 'NaN'
            ELSE CAST((SUM(HR)*13.+(SUM(BB)+SUM(HBP)-SUM(IBB))*3.-SUM(SO)*2.)/(SUM(IPouts)/3.) as double) 
       END FIP
FROM public.pitching 
GROUP BY lgID, teamID, playerID, yearID;

-- Remove outliers
DELETE FROM pitching_enh 
 where BAOpp in ('Infinity','NaN','-Infinity')
    or ERA in ('Infinity','NaN','-Infinity')
    or FIP in ('Infinity','NaN','-Infinity')
    or KtoBB in ('Infinity','NaN','-Infinity')
    or WHIP in ('Infinity','NaN','-Infinity');


-- Create table with enhanced pitching stats and aggregate
-- over mulitple stints within same year and league
-- New stats: WHIP, KtoBB, FIP
DROP TABLE IF EXISTS public.batting_enh;
CREATE TABLE public.batting_enh DISTRIBUTE BY HASH(playerID) AS
SELECT lgID, teamID, playerID, yearID,
       yearID-yearID%10 decadeID,
       SUM(G) g, SUM(AB) ab, SUM(R) r, SUM(H) h, SUM(X2B) x2b, SUM(X3B) x3b, SUM(HR) hr, SUM(RBI) rbi, 
       SUM(SB) sb, SUM(CS) cs, SUM(BB) bb, SUM(SO) so, SUM(IBB) ibb, SUM(HBP) hbp, SUM(SH) sh, SUM(SF) sf, SUM(GIDP) gidp,
       SUM(H) + SUM(X2B) + 2 * SUM(X3B) + 3 * SUM(HR) TB,
       SUM(H) + SUM(BB) + SUM(HBP) TOB,
       SUM(X2B) + SUM(X3B) + SUM(HR) XBH,
       CASE WHEN SUM(H)>0 AND SUM(AB) = 0 THEN 'Infinity'
            WHEN SUM(AB) = 0 THEN 'NaN'
            ELSE CAST(SUM(H) as double)/SUM(AB)
       END BA,
       CASE WHEN SUM(H) + SUM(X2B) + 2 * SUM(X3B) + 3 * SUM(HR) > 0 AND SUM(AB) = 0 THEN 'Infinity'
            WHEN SUM(AB) = 0 THEN 'NaN'
            ELSE CAST(SUM(H) + SUM(X2B) + 2 * SUM(X3B) + 3 * SUM(HR) as double)/SUM(AB)
       END SLG,
       CASE WHEN SUM(H) + SUM(X2B) + 2 * SUM(X3B) + 3 * SUM(HR) + SUM(BB) + SUM(HBP) + SUM(SB) - SUM(CS) > 0 AND SUM(AB) - SUM(H) + SUM(CS) + SUM(GIDP) = 0 THEN 'Infinity'
            WHEN SUM(AB) - SUM(H) + SUM(CS) + SUM(GIDP) = 0 THEN 'NaN'
            ELSE CAST(SUM(H) + SUM(X2B) + 2 * SUM(X3B) + 3 * SUM(HR) + SUM(BB) + SUM(HBP) + SUM(SB) - SUM(CS) as double)/(SUM(AB) - SUM(H) + SUM(CS) + SUM(GIDP))
       END TA,
       CASE WHEN SUM(H) + SUM(BB) + SUM(HBP) > 0 AND SUM(AB) + SUM(BB) + SUM(HBP) + SUM(SF) = 0 THEN 'Infinity'
            WHEN SUM(AB) + SUM(BB) + SUM(HBP) + SUM(SF) = 0 THEN 'NaN'
            ELSE CAST(SUM(H) + SUM(BB) + SUM(HBP) AS double)/(SUM(AB) + SUM(BB) + SUM(HBP) + SUM(SF))
       END OBP,
       CASE WHEN SUM(AB) * (SUM(H) + SUM(BB) + SUM(HBP)) + (SUM(H) + SUM(X2B) + 2 * SUM(X3B) + 3 * SUM(HR)) * (SUM(AB) + SUM(BB) + SUM(SF) + SUM(HBP)) > 0 
             AND SUM(AB) * (SUM(AB) + SUM(BB) + SUM(SF) + SUM(HBP)) = 0 THEN 'Infinity'
            WHEN SUM(AB) * (SUM(AB) + SUM(BB) + SUM(SF) + SUM(HBP)) = 0 THEN 'NaN'
            ELSE CAST(SUM(AB) * (SUM(H) + SUM(BB) + SUM(HBP)) + (SUM(H) + SUM(X2B) + 2 * SUM(X3B) + 3 * SUM(HR)) * (SUM(AB) + SUM(BB) + SUM(SF) + SUM(HBP)) AS double)
                 /(SUM(AB) * (SUM(AB) + SUM(BB) + SUM(SF) + SUM(HBP)))
       END OPS
FROM public.batting 
GROUP BY lgID, teamID, playerID, yearID;
       
-- Remove outliers
DELETE FROM batting_enh 
 WHERE BA IN ('Infinity','NaN','-Infinity')
    OR BA < 0
    OR SLG IN ('Infinity','NaN','-Infinity')
    OR SLG < 0
    OR TA IN ('Infinity','NaN','-Infinity')
    OR TA < 0
    OR OBP IN ('Infinity','NaN','-Infinity')
    OR OBP < 0;

-- Enhance team stats
DROP TABLE IF EXISTS public.teams_enh;
CREATE TABLE public.teams_enh DISTRIBUTE BY HASH(teamID) AS
SELECT f.franchname, f.active, t.*, 
       yearID-yearID%10 decadeID,
       CAST(H as double)/AB BA,
       CAST(H + X2B + 2 * X3B + 3 * HR as double)/AB SLG,
       CAST(H + BB + HBP AS double)/(AB + BB + HBP + SF) OBP,
       CAST((BB + H)/(IPouts/3.) as double) WHIP,
       CAST(SO as double)/BB KtoBB
  FROM public.teams t join
       public.teamsfranchises f on (t.franchid = f.franchid)
WHERE AB <> 0 AND IPOuts <>0 AND BB <> 0;

-- Enhance master table
DROP TABLE IF EXISTS public.master_enh;
CREATE TABLE public.master_enh DISTRIBUTE BY HASH(playerID) AS
SELECT *,
       to_date(to_char(birthyear, '0000') || to_char(birthmonth, '00') || to_char(birthday, '00'), 'YYYYMMDD') birthdate,
       to_date(to_char(deathyear, '0000') || to_char(deathmonth, '00') || to_char(deathday, '00'), 'YYYYMMDD') deathdate
  FROM public.master;