import Mathlib.Data.Finsupp.Defs
import Mathlib.Data.Fin.VecNotation
import Mathlib.Data.FunLike.Basic
import Mathlib.Data.Vector.Basic
import Mathlib.Data.Multiset.Dedup
import Mathlib.Data.Multiset.Filter
import Mathlib.Data.Prod.Lex

import Provenance.Database

variable {T: Type} [ValueType T]

inductive Term T n where
| const : T → Term T n
| index : Fin n → Term T n
| add : Term T n → Term T n → Term T n
| sub : Term T n → Term T n → Term T n
| mul : Term T n → Term T n → Term T n

def Term.repr [Repr T] : Term T n → ℕ → Std.Format
| const a, _ => reprArg a
| index k, _ => "#" ++ (reprArg k)
| add t₁ t₂, p => Repr.addAppParen (repr t₁ p ++ "+" ++ repr t₂ p) p
| sub t₁ t₂, p => Repr.addAppParen (repr t₁ p ++ "-" ++ repr t₂ p) p
| mul t₁ t₂, p => Repr.addAppParen (repr t₁ p ++ "*" ++ repr t₂ p) p

instance [Repr α] : Repr (Term α n) := ⟨Term.repr⟩

def Term.castToAnnotatedTuple (t: Term T n) : Term (T⊕K) (n+1) := match t with
| const c => const (Sum.inl c)
| index k => index (k.castLT (k.val_lt_of_le (Nat.le_add_right n 1)))
| add t₁ t₂ => add t₁.castToAnnotatedTuple t₂.castToAnnotatedTuple
| sub t₁ t₂ => sub t₁.castToAnnotatedTuple t₂.castToAnnotatedTuple
| mul t₁ t₂ => mul t₁.castToAnnotatedTuple t₂.castToAnnotatedTuple


def Term.eval (term: Term T n) (tuple: Tuple T n) := match term with
  | const a => a
  | index k => tuple k
  | add t₁ t₂ => (t₁.eval tuple) + (t₂.eval tuple)
  | sub t₁ t₂ => (t₁.eval tuple) - (t₂.eval tuple)
  | mul t₁ t₂ => (t₁.eval tuple) * (t₂.eval tuple)

theorem Term.castToAnnotatedTuple_eval [HasAltLinearOrder K] [SemiringWithMonus K] (t: Term T n) (tuple: Tuple T n) :
∀ α: K,
  t.castToAnnotatedTuple.eval (Fin.append (λ k ↦ Sum.inl (tuple k)) ![Sum.inr α]) = Sum.inl (t.eval tuple) := by
  intro α
  induction t with
  | const c =>
    unfold castToAnnotatedTuple eval
    simp
  | index k =>
    unfold castToAnnotatedTuple eval
    have hk : k.castLT (lt_trans k.isLt (lt_add_one n)) = Fin.castAdd 1 k := rfl
    rw[hk]
    rw[Fin.append_left]
  | add t₁ t₂ ih₁ ih₂ =>
    unfold castToAnnotatedTuple eval
    rw[ih₁, ih₂]
    simp[(·+·),Add.add]
  | sub t₁ t₂ ih₁ ih₂ =>
    unfold castToAnnotatedTuple eval
    rw[ih₁, ih₂]
    simp[(·-·),Sub.sub]
  | mul t₁ t₂ ih₁ ih₂ =>
    unfold castToAnnotatedTuple eval
    rw[ih₁, ih₂]
    simp[(·*·),Mul.mul]

instance : Coe T (Term T n) where
  coe a:= Term.const a

instance : OfNat (Term ℕ n) (a: ℕ) where
  ofNat := Term.const a

prefix:max "#" => Term.index

inductive BoolTerm (T) (n: ℕ) where
| EQ : Term T n → Term T n → BoolTerm T n
| NE : Term T n → Term T n → BoolTerm T n
| LE : Term T n → Term T n → BoolTerm T n
| LT : Term T n → Term T n → BoolTerm T n
| GE : Term T n → Term T n → BoolTerm T n
| GT : Term T n → Term T n → BoolTerm T n

def BoolTerm.repr [Repr T] : BoolTerm T n → ℕ → Std.Format
| EQ t₁ t₂, p => Repr.addAppParen (t₁.repr p ++ "==" ++ t₂.repr p) p
| NE t₁ t₂, p => Repr.addAppParen (t₁.repr p ++ "!=" ++ t₂.repr p) p
| LE t₁ t₂, p => Repr.addAppParen (t₁.repr p ++ "<=" ++ t₂.repr p) p
| LT t₁ t₂, p => Repr.addAppParen (t₁.repr p ++ "<" ++ t₂.repr p) p
| GE t₁ t₂, p => Repr.addAppParen (t₁.repr p ++ ">=" ++ t₂.repr p) p
| GT t₁ t₂, p => Repr.addAppParen (t₁.repr p ++ ">" ++ t₂.repr p) p

instance [Repr α] : Repr (BoolTerm α n) := ⟨BoolTerm.repr⟩

def BoolTerm.castToAnnotatedTuple (bt: BoolTerm T n): BoolTerm (T⊕K) (n+1) :=
  match bt with
  | EQ a b => EQ a.castToAnnotatedTuple b.castToAnnotatedTuple
  | NE a b => NE a.castToAnnotatedTuple b.castToAnnotatedTuple
  | LE a b => LE a.castToAnnotatedTuple b.castToAnnotatedTuple
  | LT a b => LT a.castToAnnotatedTuple b.castToAnnotatedTuple
  | GE a b => GE a.castToAnnotatedTuple b.castToAnnotatedTuple
  | GT a b => GT a.castToAnnotatedTuple b.castToAnnotatedTuple

infix:20 " == " => λ x y ↦ BoolTerm.EQ x y
infix:20 " != " => λ x y ↦ BoolTerm.NE x y
infix:20 " <= " => λ x y ↦ BoolTerm.LE x y
infix:20 " < " => λ x y ↦ BoolTerm.LT x y
infix:20 " >= " => λ x y ↦ BoolTerm.GE x y
infix:20 " > " => λ x y ↦ BoolTerm.GT x y

def BoolTerm.eval (φ: BoolTerm T n) (tuple: Tuple T n) := match φ with
| EQ t₁ t₂ => (t₁.eval tuple) = (t₂.eval tuple)
| NE t₁ t₂ => (t₁.eval tuple) ≠ (t₂.eval tuple)
| LE t₁ t₂ => (t₁.eval tuple) ≤ (t₂.eval tuple)
| LT t₁ t₂ => (t₁.eval tuple) < (t₂.eval tuple)
| GE t₁ t₂ => (t₁.eval tuple) ≥ (t₂.eval tuple)
| GT t₁ t₂ => (t₁.eval tuple) > (t₂.eval tuple)

theorem BoolTerm.castToAnnotatedTuple_eval [HasAltLinearOrder K] [SemiringWithMonus K] (t: BoolTerm T n) (tuple: Tuple T n) :
  ∀ α: K, t.castToAnnotatedTuple.eval (Fin.append (λ k ↦ Sum.inl (tuple k)) ![Sum.inr α]) = t.eval tuple := by
    intro α
    induction t with
    | EQ t₁ t₂ =>
      unfold BoolTerm.eval BoolTerm.castToAnnotatedTuple
      simp
      repeat rw[Term.castToAnnotatedTuple_eval]
      simp
    | NE t₁ t₂ =>
      unfold BoolTerm.eval BoolTerm.castToAnnotatedTuple
      simp
      repeat rw[Term.castToAnnotatedTuple_eval]
      simp
    | LE t₁ t₂ =>
      unfold BoolTerm.eval BoolTerm.castToAnnotatedTuple
      simp
      repeat rw[Term.castToAnnotatedTuple_eval]
      exact ge_iff_le
    | LT t₁ t₂ =>
      unfold BoolTerm.eval BoolTerm.castToAnnotatedTuple
      simp
      repeat rw[Term.castToAnnotatedTuple_eval]
      simp[LT.lt]
      exact le_of_lt
    | GE t₁ t₂ =>
      unfold BoolTerm.eval BoolTerm.castToAnnotatedTuple
      simp
      repeat rw[Term.castToAnnotatedTuple_eval]
      exact ge_iff_le
    | GT t₁ t₂ =>
      unfold BoolTerm.eval BoolTerm.castToAnnotatedTuple
      simp
      repeat rw[Term.castToAnnotatedTuple_eval]
      simp[LT.lt]
      exact le_of_lt

def BoolTerm.evalDecidable (φ: BoolTerm T n) : DecidablePred φ.eval :=
  λ t => by
    cases φ <;> rename_i x y <;> simp [BoolTerm.eval]
    . exact inferInstanceAs (Decidable (x.eval t = y.eval t))
    . exact inferInstanceAs (Decidable (x.eval t ≠ y.eval t))
    . exact inferInstanceAs (Decidable (x.eval t ≤ y.eval t))
    . exact inferInstanceAs (Decidable (x.eval t < y.eval t))
    . exact inferInstanceAs (Decidable (y.eval t ≤ x.eval t))
    . exact inferInstanceAs (Decidable (y.eval t < x.eval t))

inductive Filter (T) (n: ℕ) where
| BT   : BoolTerm T n   → Filter T n
| Not  : Filter T n → Filter T n
| And  : Filter T n → Filter T n → Filter T n
| Or   : Filter T n → Filter T n → Filter T n
| True : Filter T n

def Filter.repr [Repr T] : Filter T n → ℕ → Std.Format
| BT t, p => t.repr p
| Not f, p => "¬" ++ (Repr.addAppParen (f.repr p) p)
| And t₁ t₂, p => Repr.addAppParen (t₁.repr p ++ "∧" ++ t₂.repr p) p
| Or t₁ t₂, p => Repr.addAppParen (t₁.repr p ++ "∨" ++ t₂.repr p) p
| True, _ => "True"

instance [Repr α] : Repr (Filter α n) := ⟨Filter.repr⟩

def Filter.castToAnnotatedTuple (f: Filter T n): Filter (T⊕K) (n+1) := match f with
| BT  t     => BT t.castToAnnotatedTuple
| Not φ     => Not φ.castToAnnotatedTuple
| And φ₁ φ₂ => And φ₁.castToAnnotatedTuple φ₂.castToAnnotatedTuple
| Or  φ₁ φ₂ => Or φ₁.castToAnnotatedTuple φ₂.castToAnnotatedTuple
| True      => True

def Filter.eval (φ: Filter T n) (tuple: Tuple T n) := match φ with
| BT  φ     => φ.eval tuple
| Not φ     => ¬ (φ.eval tuple)
| And φ₁ φ₂ => (φ₁.eval tuple) ∧ (φ₂.eval tuple)
| Or  φ₁ φ₂ => (φ₁.eval tuple) ∨ (φ₂.eval tuple)
| True      => true

theorem Filter.castToAnnotatedTuple_eval [HasAltLinearOrder K] [SemiringWithMonus K] (φ: Filter T n) (tuple: Tuple T n) :
∀ α: K,
  φ.castToAnnotatedTuple.eval (Fin.append (λ k ↦ Sum.inl (tuple k)) ![Sum.inr α]) = φ.eval tuple := by
    intro α
    induction φ with
    | BT t =>
      simp[Filter.eval,Filter.castToAnnotatedTuple]
      rw[BoolTerm.castToAnnotatedTuple_eval]
    | Not φ ih =>
      simp[Filter.eval,Filter.castToAnnotatedTuple]
      rw[ih]
    | And φ₁ φ₂ ih₁ ih₂ =>
      simp[Filter.eval,Filter.castToAnnotatedTuple]
      rw[ih₁,ih₂]
    | Or φ₁ φ₂ ih₁ ih₂ =>
      simp[Filter.eval,Filter.castToAnnotatedTuple]
      rw[ih₁,ih₂]
    | True => trivial

def Filter.evalDecidable (φ : Filter T n) : DecidablePred φ.eval :=
  λ t => match φ with
    | Filter.BT φ      => φ.evalDecidable t
    | Filter.Not φ     => match φ.evalDecidable t with
      | isTrue h  => isFalse (by simp [Filter.eval, h])
      | isFalse h => isTrue  (by simp [Filter.eval, h])
    | Filter.And φ₁ φ₂  => match φ₁.evalDecidable t, φ₂.evalDecidable t with
      | isTrue h₁, isTrue h₂   => isTrue  (by simp [Filter.eval, h₁, h₂])
      | isFalse h, _ | _, isFalse h => isFalse (by simp [Filter.eval, h])
    | Filter.Or φ₁ φ₂   => match φ₁.evalDecidable t, φ₂.evalDecidable t with
      | isTrue h, _ | _, isTrue h => isTrue (by simp [Filter.eval, h])
      | isFalse h₁, isFalse h₂    => isFalse (by simp [Filter.eval, h₁, h₂])
    | Filter.True       => isTrue (rfl)

instance : Coe (BoolTerm T n) (Filter T n) where
  coe bt := Filter.BT bt

inductive AggFunc
| sum
deriving Repr

def addFn (a b : T) := a + b
instance : @Std.Commutative T addFn where
  comm := add_comm
instance : @Std.Associative T addFn where
  assoc := add_assoc

def AggFunc.eval (a: AggFunc) (m: Multiset T) := match a with
| sum => m.fold (addFn: T→T→T) 0

inductive Query (T: Type) : ℕ → Type
| Rel   : (n: ℕ) → String → Query T n
| Proj  : Tuple (Term T n) m → Query T n → Query T m
| Sel   : Filter T n → Query T n → Query T n
| Prod {hn: n₁+n₂=n} : Query T n₁ → Query T n₂ → Query T n
| Sum   : Query T n → Query T n → Query T n
| Dedup : Query T n → Query T n
| Diff  : Query T n → Query T n → Query T n
| Agg       : Tuple (Fin m) n₁ → Tuple (Term T m) n₂ → Tuple AggFunc n₂ → Query T m → Query T (n₁+n₂)

def Query.repr [Repr T] : Query T n → ℕ → Std.Format
| Rel _ s, p => s
| Proj ts q, p => "Π_" ++ (Repr.addAppParen (ts.repr p) p) ++ (Repr.addAppParen (q.repr p) p)
| Sel φ q, p => "σ_" ++ (Repr.addAppParen (φ.repr p) p) ++ (Repr.addAppParen (q.repr p) p)
| Prod q₁ q₂, p => Repr.addAppParen (q₁.repr p ++ "×" ++ q₂.repr p) p
| Sum q₁ q₂, p => Repr.addAppParen (q₁.repr p ++ "⊎" ++ q₂.repr p) p
| Dedup q, p => "ε" ++ Repr.addAppParen (q.repr p) p
| Diff q₁ q₂, p => Repr.addAppParen (q₁.repr p ++ "-" ++ q₂.repr p) p
| Agg is ts as q, p =>
  "γ_" ++ (Repr.addAppParen (is.repr p) p)
    ++ (Repr.addAppParen (ts.repr p) p)
    ++ (Repr.addAppParen (as.repr p) p)
    ++ (Repr.addAppParen (q.repr p) p)

instance [Repr α] : Repr (Query α n) := ⟨Query.repr⟩

def Query.noAgg (q: Query T n): Prop := match q with
| Rel   n  s  => True
| Proj  _ q   => q.noAgg
| Sel   _  q  => q.noAgg
| Prod  q₁ q₂ => q₁.noAgg ∧ q₂.noAgg
| Sum   q₁ q₂ => q₁.noAgg ∧ q₂.noAgg
| Dedup q     => q.noAgg
| Diff  q₁ q₂ => q₁.noAgg ∧ q₂.noAgg
| Agg _ _ _ q => False

def Query.noAggDecidable {T: Type} {n: ℕ}: DecidablePred (@Query.noAgg T n):=
  fun (q: Query T n) => match q with
  | Rel n s => isTrue (by simp[noAgg])
  | Proj  _ q'   => match q'.noAggDecidable with
    | isTrue h => isTrue (by simp[noAgg]; exact h)
    | isFalse h => isFalse (by simp[noAgg]; exact h)
  | Sel   _  q'  => match q'.noAggDecidable with
    | isTrue h => isTrue (by simp[noAgg]; exact h)
    | isFalse h => isFalse (by simp[noAgg]; exact h)
  | Prod  q₁ q₂ => match q₁.noAggDecidable, q₂.noAggDecidable with
    | isTrue h₁,  isTrue h₂  => isTrue (by simp[noAgg]; exact ⟨h₁,h₂⟩)
    | isFalse h₁, _          => isFalse (by simp[noAgg]; simp[h₁])
    | _,          isFalse h₂ => isFalse (by simp[noAgg]; simp[h₂])
  | Sum   q₁ q₂ => match q₁.noAggDecidable, q₂.noAggDecidable with
    | isTrue h₁,  isTrue h₂  => isTrue (by simp[noAgg]; exact ⟨h₁,h₂⟩)
    | isFalse h₁, _          => isFalse (by simp[noAgg]; simp[h₁])
    | _,          isFalse h₂ => isFalse (by simp[noAgg]; simp[h₂])
  | Dedup q'     => match q'.noAggDecidable with
    | isTrue h => isTrue (by simp[noAgg]; exact h)
    | isFalse h => isFalse (by simp[noAgg]; exact h)
  | Diff  q₁ q₂ => match q₁.noAggDecidable, q₂.noAggDecidable with
    | isTrue h₁,  isTrue h₂  => isTrue (by simp[noAgg]; exact ⟨h₁,h₂⟩)
    | isFalse h₁, _          => isFalse (by simp[noAgg]; simp[h₁])
    | _,          isFalse h₂ => isFalse (by simp[noAgg]; simp[h₂])
  | Agg _ _ _ q' => isFalse (by simp[noAgg])

instance {T: Type} {n: ℕ} : DecidablePred (@Query.noAgg T n) := Query.noAggDecidable

set_option linter.unusedSectionVars false
@[simp]
theorem Query.noAggProd {q: Query T n} :
  q.noAgg → ∀ {n₁} {q₁: Query T n₁} {q₂: Query T n₂} {hn: n₁+n₂=n}
    (_: q = @Prod T n₁ n₂ n hn q₁ q₂), q₁.noAgg ∧ q₂.noAgg  := by
    intro hna n₁ q₁ q₂ hn₁ hq
    unfold noAgg at hna
    simp[hq] at hna
    assumption

@[simp]
theorem Query.noAggSum {q: Query T n} :
  q.noAgg → ∀ {q₁: Query T n} {q₂: Query T n} (_: q = Sum q₁ q₂), q₁.noAgg ∧ q₂.noAgg  := by
    intro hna q₁ q₂ hq
    unfold noAgg at hna
    simp[hq] at hna
    assumption

@[simp]
theorem Query.noAggDiff {q: Query T n} :
  q.noAgg → ∀ {q₁: Query T n} {q₂: Query T n} (_: q = Diff q₁ q₂), q₁.noAgg ∧ q₂.noAgg  := by
    intro hna q₁ q₂ hq
    unfold noAgg at hna
    simp[hq] at hna
    assumption

@[simp]
theorem Query.noAggProj {q: Query T n} :
  q.noAgg → ∀ {m} {t} {q': Query T m} (_: q = Proj t q'), q'.noAgg := by
    intro hna m t q' hq
    unfold noAgg at hna
    rw[hq] at hna
    assumption

@[simp]
theorem Query.noAggSel {q: Query T n} :
  q.noAgg → ∀ {φ} {q': Query T n} (_: q = Sel φ q'), q'.noAgg := by
    intro hna φ q' hq
    unfold noAgg at hna
    rw[hq] at hna
    assumption

@[simp]
theorem Query.noAggDedup {q: Query T n} :
  q.noAgg → ∀ {q': Query T n} (_: q = Dedup q'), q'.noAgg := by
    intro hna q' hq
    unfold noAgg at hna
    rw[hq] at hna
    assumption

prefix:max "Π " => Query.Proj
prefix:max "σ " => Query.Sel
infix:80 " × " => Query.Prod
infix:50 " ⊎ " => Query.Sum
prefix:max "ε " => Query.Dedup
infix:50 " - " => Query.Diff
infix:1020 " ⋈ " => λ q₁ φ ↦ λ q₂ ↦ (σ φ) (q₁ × q₂)
infix:50 " ∪ " => λ q₁ q₂ ↦ ε (q₁ ⊎ q₂)

def Query.arity (_: Query T n) := n

def Query.aggdepth2_plus_depth (q: Query T n) : ℕ := match q with
| Rel   n  s  => 0
| Proj  _ q   => let d := q.aggdepth2_plus_depth; d+1
| Sel   _  q  => let d := q.aggdepth2_plus_depth; d+1
| Prod  q₁ q₂ =>
  let d₁ := q₁.aggdepth2_plus_depth
  let d₂ := q₂.aggdepth2_plus_depth
  (max d₁ d₂)+1
| Sum   q₁ q₂ =>
  let d₁ := q₁.aggdepth2_plus_depth
  let d₂ := q₂.aggdepth2_plus_depth
  (max d₁ d₂)+1
| Dedup q     => let d := q.aggdepth2_plus_depth; d+1
| Diff  q₁ q₂ =>
  let d₁ := q₁.aggdepth2_plus_depth
  let d₂ := q₂.aggdepth2_plus_depth
  (max d₁ d₂)+1
| Agg _ _ _ q => let d := q.aggdepth2_plus_depth; (d+3)

def Query.evaluate (q: Query T n) (d: Database T): Relation T n := match q with
| Rel   n  s  =>
  match d.find n s with
  | none => (∅: Multiset (Tuple T n))
  | some rn => rn
| Proj ts q => let r := evaluate q d; Multiset.map (λ t ↦ λ k ↦ (ts k).eval t) r
| Sel   φ  q  => let r := evaluate q d; @Multiset.filter _ φ.eval φ.evalDecidable r
| @Prod _ n₁ n₂ n hn q₁ q₂ =>
  let r₁ := evaluate q₁ d
  let r₂ := evaluate q₂ d
  (r₁ * r₂).cast hn
| Sum   q₁ q₂ => let r₁ := evaluate q₁ d; let r₂ := evaluate q₂ d; r₁ + r₂
| Dedup q     => let r := evaluate q d; Multiset.dedup r
| Diff  q₁ q₂ => let r₁ := evaluate q₁ d; let r₂ := evaluate q₂ d; r₁ - r₂
| @Agg _ m n₁ n₂ is ts as q =>
    let r := evaluate ε (Π (λ (k: Fin n₁) ↦ #(is k)) q) d
    let s := evaluate q d
    r.map (λ t ↦ Fin.append t (
      λ (k: Fin n₂) ↦ ((as k).eval (
        (s.filter (λ u ↦ ∀ k': Fin n₁, u (is k') = t k')).map (λ u ↦ (ts k).eval u)
      ))
    ))
termination_by q.aggdepth2_plus_depth
decreasing_by
  all_goals simp[aggdepth2_plus_depth]
  any_goals refine Nat.lt_add_one_of_le ?_
  any_goals exact Nat.le_max_left _ _
  any_goals exact Nat.le_max_right _ _