{ "cells": [ { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "import os \n", "import numpy as np\n", "import shutil\n", "import json\n", "from tqdm import tqdm\n", "import numpy as np\n", "from sklearn.preprocessing import LabelEncoder\n", "from sklearn.model_selection import StratifiedKFold,StratifiedShuffleSplit\n", "from decord import VideoReader" ] }, { "cell_type": "code", "execution_count": 72, "metadata": {}, "outputs": [], "source": [ "def get_file_stem(path):\n", " base=os.path.basename(path)\n", " return os.path.splitext(base)[0]" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "root_path = r\"C:\\Users\\jeuux\\Desktop\\Carrera\\MoAI\\TFM\\AnnotatedData\\FinalDatasets\\Participants\"\n", "participants = os.listdir(root_path)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "processed_participants = ['1205a',\n", " '0205b',\n", " '0605b',\n", " '2504g',\n", " '020419c',\n", " '0705b',\n", " '2504d',\n", " '020419f',\n", " '0905a',\n", " '2204c',\n", " '0205e',\n", " '010419c',\n", " '1205b',\n", " '2504e',\n", " '0404b',\n", " '1105a',\n", " '0905b',\n", " '2304b',\n", " '1105d',\n", " '2604a',\n", " '1105c',\n", " '1005c',\n", " '1005b',\n", " '1105e',\n", " '2504c',\n", " '020419e']" ] }, { "cell_type": "code", "execution_count": 49, "metadata": {}, "outputs": [], "source": [ "final_participants= [participant for participant in participants if not(participant in processed_participants)]" ] }, { "cell_type": "code", "execution_count": 20, "metadata": {}, "outputs": [], "source": [ "new_root = r\"C:\\Users\\jeuux\\Desktop\\Carrera\\MoAI\\TFM\\AnnotatedData\\FinalDatasets\\Datasets\\Video_Dartaset_Pictures\"" ] }, { "cell_type": "code", "execution_count": 50, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "100%|██████████████████████████████████████████████████████████████████████████████████| 48/48 [00:01<00:00, 42.16it/s]\n" ] } ], "source": [ "#get participants with videos\n", "video_folder = \"ArtworkClips\" \n", "\n", "dfs = {\"video_path\":[],\"label\":[],\"frames\":[]}\n", "for participant in tqdm(final_participants):\n", " cd = os.path.join(root_path,participant)\n", "# dest_dir = os.path.join(new_root,participant,video_folder)\n", "\n", " if(video_folder in os.listdir(cd)):\n", "# if(not(os.path.isdir(dest_dir))):\n", "# os.makedirs(dest_dir)\n", "# #copy folder to new_dataset_folder\n", "# dir_to_copy = os.path.join(cd,video_folder)\n", "# for file in os.listdir(dir_to_copy):\n", "# dest = os.path.join(dest_dir,file)\n", "# file_dir =os.path.join(dir_to_copy,file)\n", "# shutil.copyfile(file_dir,dest)\n", " try:\n", " df_path = os.path.join(cd,\"FullDataset\",f\"video_dataset_{participant}.txt\")\n", " df = json.load(open(df_path))\n", " for key in dfs.keys():\n", " dfs[key].append(df[key])\n", " except: \n", " print(f\"{participant} not processed\")\n", "for key in dfs.keys():\n", " dfs[key]= np.concatenate(dfs[key])\n", " \n", " \n" ] }, { "cell_type": "code", "execution_count": 51, "metadata": {}, "outputs": [], "source": [ "df = pd.DataFrame.from_dict(dfs)" ] }, { "cell_type": "code", "execution_count": 60, "metadata": {}, "outputs": [], "source": [ "df[\"participant\"] = df[\"video_path\"].apply(lambda row: row.split(\"/\")[0])" ] }, { "cell_type": "code", "execution_count": 61, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
| \n", " | video_path | \n", "label | \n", "frames | \n", "participant | \n", "
|---|---|---|---|---|
| 0 | \n", "010419e/ArtworkClips/010419e_10.mp4 | \n", "15 | \n", "280 | \n", "010419e | \n", "
| 1 | \n", "010419e/ArtworkClips/010419e_11.mp4 | \n", "4 | \n", "341 | \n", "010419e | \n", "
| 2 | \n", "010419e/ArtworkClips/010419e_12.mp4 | \n", "4 | \n", "347 | \n", "010419e | \n", "
| 3 | \n", "010419e/ArtworkClips/010419e_14.mp4 | \n", "5 | \n", "295 | \n", "010419e | \n", "
| 4 | \n", "010419e/ArtworkClips/010419e_17.mp4 | \n", "0 | \n", "354 | \n", "010419e | \n", "
| ... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
| 2030 | \n", "3004e/ArtworkClips/3004e_93.mp4 | \n", "16 | \n", "292 | \n", "3004e | \n", "
| 2031 | \n", "3004e/ArtworkClips/3004e_94.mp4 | \n", "16 | \n", "278 | \n", "3004e | \n", "
| 2032 | \n", "3004e/ArtworkClips/3004e_95.mp4 | \n", "16 | \n", "287 | \n", "3004e | \n", "
| 2033 | \n", "3004e/ArtworkClips/3004e_96.mp4 | \n", "0 | \n", "295 | \n", "3004e | \n", "
| 2034 | \n", "3004e/ArtworkClips/3004e_98.mp4 | \n", "16 | \n", "289 | \n", "3004e | \n", "
2035 rows × 4 columns
\n", "