{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import os \n",
    "import pandas as pd\n",
    "import numpy as np"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "def get_file_stem(path):\n",
    "    base=os.path.basename(path)\n",
    "    return os.path.splitext(base)[0]\n",
    "\n",
    "def read_metadata(df_path):  \n",
    "    #read df\n",
    "    df = pd.read_csv(df_path,sep=\" \",header= None)\n",
    "    df.columns = [\"video_path\",\"frames\",\"label\"]\n",
    "    return df\n",
    "\n",
    "def df_to_txt(df,dir_path):\n",
    "    df.to_csv(dir_path, header=None, index=None, sep=' ', mode='a')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "file = r\"C:\\Users\\jeuux\\Desktop\\Carrera\\MoAI\\TFM\\AnnotatedData\\FinalDatasets\\Datasets\\HAR_Video\\Base_Dataset\\Train_encodded.txt\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [],
   "source": [
    "df = read_metadata(file)\n",
    "freq = df.label.value_counts(normalize=True)\n",
    "weights  = np.empty(len(freq))\n",
    "for idx,class_freq in zip(freq.index,freq.values):\n",
    "    weights[idx]  = 1/class_freq"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Int64Index([21, 2, 6, 11, 20, 3, 0, 7, 9, 1, 19, 5, 8, 16, 10, 15, 12, 17, 13,\n",
       "            4, 18, 14],\n",
       "           dtype='int64')"
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "freq.index"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([5.73450568e-01, 1.04010294e-01, 5.23268282e-02, 3.51704911e-02,\n",
       "       3.51704911e-02, 3.19536779e-02, 3.06669526e-02, 2.42333262e-02,\n",
       "       2.23032383e-02, 1.88719708e-02, 1.35106155e-02, 1.30817071e-02,\n",
       "       1.13660733e-02, 8.57816856e-03, 6.21917221e-03, 5.36135535e-03,\n",
       "       4.07463007e-03, 2.78790478e-03, 2.57345057e-03, 2.14454214e-03,\n",
       "       1.71563371e-03, 4.28908428e-04])"
      ]
     },
     "execution_count": 18,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "freq.values"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([3.26083916e+01, 5.29886364e+01, 9.61443299e+00, 3.12953020e+01,\n",
       "       4.66300000e+02, 7.64426230e+01, 1.91106557e+01, 4.12654867e+01,\n",
       "       8.79811321e+01, 4.48365385e+01, 1.60793103e+02, 2.84329268e+01,\n",
       "       2.45421053e+02, 3.88583333e+02, 2.33150000e+03, 1.86520000e+02,\n",
       "       1.16575000e+02, 3.58692308e+02, 5.82875000e+02, 7.40158730e+01,\n",
       "       2.84329268e+01, 1.74382947e+00])"
      ]
     },
     "execution_count": 21,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "weights"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {},
   "outputs": [],
   "source": [
    "import joblib\n",
    "encoder_file = r\"C:\\Users\\jeuux\\Desktop\\Carrera\\MoAI\\TFM\\AnnotatedData\\FinalDatasets\\Datasets\\HAR_dataset_v1\\encoder_train.pkl\"\n",
    "encoder_file_2  =r\"C:\\Users\\jeuux\\Downloads\\encoder_train (1).pkl\"\n",
    "encoder = joblib.load(encoder_file_2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "22"
      ]
     },
     "execution_count": 25,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(encoder.classes_)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.6"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}