{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "import os\n", "import numpy as np" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from moviepy.editor import VideoFileClip, concatenate_videoclips" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "base_dir = r\"C:\\Users\\jeuux\\Desktop\\Carrera\\MoAI\\TFM\\AnnotatedData\\Videos_raw\"\n", "clip1 = VideoFileClip(os.path.join(base_dir,\"1005b_part1.mp4\"))\n", "clip2 = VideoFileClip(os.path.join(base_dir,\"1005b_part2.mp4\"))\n", "final_clip = concatenate_videoclips([clip1,clip2])\n", "final_clip.write_videofile(\"1005b.mp4\")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Dataset" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "data_file = r\"C:\\Users\\jeuux\\Desktop\\Carrera\\MoAI\\TFM\\AnnotatedData\\FinalDatasets\\Participants\\1105e\\FullDataset\\Final_data_1105e.pkl\"\n", "data = pd.read_pickle(data_file)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "data" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "def windows(data, size,factor=2):\n", " start = 0\n", " while start + (size / factor) < len(data):\n", " yield int(start), int(start + size)\n", " start += (size / factor)\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "def to_seconds(time,factor = 1000):\n", " seconds = time/factor\n", " return round(seconds,2)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "def get_pictures_dataset(data,user,window_size,factor =1):\n", " df_pictures = {\"id\":[],\"target\":[],\"start\":[],\"end\":[]}\n", " \n", " for idx,(start, end) in enumerate(windows(data.index, window_size,factor)):\n", " df_pictures[\"id\"].append(f\"{user}_{idx}\")\n", " df_pictures[\"target\"].append(data.loc[start:end,\"picture\"].mode()[0])\n", " df_pictures[\"start\"].append(to_seconds(data.loc[start,\"Recording timestamp\"]))\n", " df_pictures[\"end\"].append(to_seconds(data.loc[end,\"Recording timestamp\"]))\n", " \n", " return pd.DataFrame.from_dict(df_pictures).set_index(\"id\")" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "user = \"1105e\"\n", "test = get_pictures_dataset(data,user,500)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "def filter_nulls(df):\n", " #get samples of majoritary class different than null\n", " samples_max=max(test[test[\"target\"]!=\"Null\"]\n", " .target \\\n", " .value_counts() ) \n", " #get id for null and rest of labels\n", " null_idx=df[df[\"target\"]==\"Null\"].index\n", " samples_null = len(null_idx)\n", " if(samples_null>samples_max):\n", " #perform random undersampling\n", " rest_idx = df[df[\"target\"]!=\"Null\"].index\n", " # sample a subset of id from null samples to match majoritary class samples\n", " selected_idx = np.random.choice(range(samples_null), size=samples_max, replace=False)\n", " null_idx=null_idx[selected_idx]\n", " #filter df \n", " df = df.loc[null_idx|rest_idx]\n", "\n", " return df\n", " " ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "df= filter_nulls(test)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "df" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Video" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from moviepy.editor import *" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "path_video = r\"C:\\Users\\jeuux\\Desktop\\Carrera\\MoAI\\TFM\\AnnotatedData\\Videos_raw\\1105e.mp4\"\n", "video = VideoFileClip(path_video).resize((224,224))" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "df = df.reset_index()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "video_dataset = {\n", " \"video_path\": [],\n", " \"label\": [],\n", " \"frames\": []\n", "}\n", "root_path = r\"C:\\Users\\jeuux\\Desktop\\Carrera\\MoAI\\TFM\\AnnotatedData\\FinalDatasets\\Participants\"\n", "video_folder = r\"C:\\Users\\jeuux\\Desktop\\Carrera\\MoAI\\TFM\\AnnotatedData\\FinalDatasets\\Participants\\1105e\\Clips\"\n", "participant =\"1105e\"\n", "if not(os.path.isdir(video_folder)):\n", " os.makedirs(video_folder)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "for idx in tqdm(df.index):\n", " #get clip\n", " clip = video.subclip(df.loc[idx,\"start\"],df.loc[idx,\"end\"])\n", " n_frames = clip.duration * clip.fps\n", " label = df.loc[idx,\"target\"]\n", " # Write the result to a file \n", " clip_id = df.loc[idx,\"id\"]\n", " video_rel_path = os.path.join(participant,\"Clips\",f\"{clip_id}.mp4\")\n", " clip_file= os.path.join(root_path,video_rel_path)\n", " clip.write_videofile(clip_file,audio=False,logger = None)\n", " #update dataset\n", " video_dataset[\"video_path\"].append(video_rel_path)\n", " video_dataset[\"label\"].append(label)\n", " video_dataset[\"frames\"].append(n_frames)\n", " " ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "video_dataset" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [], "source": [ "media_path = r\"C:\\Users\\jeuux\\Desktop\\Carrera\\MoAI\\TFM\\ProjectData\\Media\"\n", "participants_path = r\"C:\\Users\\jeuux\\Desktop\\Carrera\\MoAI\\TFM\\ProjectData\\Participants\"\n", "recordings_path = r\"C:\\Users\\jeuux\\Desktop\\Carrera\\MoAI\\TFM\\ProjectData\\Recordings\"" ] }, { "cell_type": "code", "execution_count": 17, "metadata": {}, "outputs": [], "source": [ "import xml.etree.ElementTree as ET\n", "def get_media_keys(xml_file):\n", " tree = ET.parse(xml_file)\n", " root = tree.getroot()\n", " key = root.find(\"Key\").text\n", " filename = root.find(\"TargetFileName\").text\n", " return key,filename\n", "\n", "def get_participant_keys(xml_file):\n", " tree = ET.parse(xml_file)\n", " root = tree.getroot()\n", " key = root.find(\"Key\").text\n", " participant = root.find(\"Name\").text\n", " return key,participant\n", "\n", "def get_recording_keys(xml_file):\n", " tree = ET.parse(xml_file)\n", " participant_id = next(elem.text for elem in tree.iter() if \"ParticipantId\" in elem.tag)\n", " media_id = next(elem.text for elem in tree.iter() if \"guid\" in elem.tag)\n", " return participant_id, media_id" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [], "source": [ "test = r\"C:\\Users\\jeuux\\Desktop\\Carrera\\MoAI\\TFM\\ProjectData\\Recordings\\BqQksdKnyUK4EYy9bga5Yg.rec\"\n", "tree = ET.parse(test)" ] }, { "cell_type": "code", "execution_count": 16, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "dcd22fbf-4ea2-49cf-90da-463d9e49e9ae\n" ] } ], "source": [ "for el in tree.iter():\n", " if(\"guid\" in el.tag):\n", " print(el.text)" ] }, { "cell_type": "code", "execution_count": 18, "metadata": {}, "outputs": [], "source": [ "#define key mappings\n", "recording_files = [os.path.join(recordings_path,file) for file in os.listdir(recordings_path) if \"rec\" in file]\n", "recordings = {}\n", "for idx,file in enumerate(recording_files):\n", " participant_id, media_id = get_recording_keys(file)\n", " recordings[idx] = {participant_id: \"\" , media_id: \"\"}\n", " " ] }, { "cell_type": "code", "execution_count": 26, "metadata": {}, "outputs": [], "source": [ "x = recordings[1].keys()" ] }, { "cell_type": "code", "execution_count": 29, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "dict_keys(['2872c273-20e0-4ed3-a3bf-1908a208a760', 'd0dcbb3f-3360-4233-b1e8-9da4a85c3257'])" ] }, "execution_count": 29, "metadata": {}, "output_type": "execute_result" } ], "source": [ "x" ] }, { "cell_type": "code", "execution_count": 30, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "True" ] }, "execution_count": 30, "metadata": {}, "output_type": "execute_result" } ], "source": [ "'2872c273-20e0-4ed3-a3bf-1908a208a760' in x" ] }, { "cell_type": "code", "execution_count": 34, "metadata": {}, "outputs": [], "source": [ "for file in os.listdir(participants_path):\n", " file = os.path.join(participants_path,file)\n", " # get participant key\n", " participant_key,participant_name = get_participant_keys(file)\n", " for key in recordings.keys():\n", " if (participant_key in recordings[key].keys()):\n", " recordings[key][participant_key] = participant_name\n", " " ] }, { "cell_type": "code", "execution_count": 37, "metadata": {}, "outputs": [], "source": [ "media_files = [os.path.join(media_path,file) for file in os.listdir(media_path)\n", " if \"xml\" in file]\n", "for file in media_files:\n", " # get participant key\n", " media_key,recording_filename = get_media_keys(file)\n", " for key in recordings.keys():\n", " if (media_key in recordings[key].keys()):\n", " recordings[key][media_key] = recording_filename\n", " " ] }, { "cell_type": "code", "execution_count": 40, "metadata": {}, "outputs": [], "source": [ "import json\n", "\n", "recordings_metadata_file = r\"C:\\Users\\jeuux\\Desktop\\Carrera\\MoAI\\TFM\\AnnotatedData\\FinalDatasets\\Datasets\\Video_Dataset_meta\\recordings_meta.json\"\n", "with open(recordings_metadata_file,\"w\") as f:\n", " json.dump(recordings,f)" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "\n", "# participant_keys = [get_hash_root(os.path.join(xml_dir,xml_file)) for xml_file in os.listdir(participants_path)]\n", "media_keys = [get_hash_root(os.path.join(media_path,xml_file))\n", " for xml_file in os.listdir(media_path)\n", " if \"xml\" in xml_file]\n", "participant_keys = {}\n", "\n", "for xml_file in os.listdir(participants_path):\n", " if(\"rec\" in xml_file):\n", " key,participant = get_key_name(os.path.join(participants_path,xml_file))\n", " participant_keys[key] = participant\n", " \n", "participant_ids = [get_participant_id(os.path.join(recordings_path,xml_file))for xml_file in os.listdir(recordings_path)\n", " if \"rec\" in xml_file]" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Hi\n" ] } ], "source": [ "ext_rec_id = \"b27628c4-2e7f-4e8e-8dd3-d933d165f04e\"\n", "key = \"b124a406-a7d2-42c9-b811-8cbd6e06b962\"\n", "media_dp1 = \"dcd22fbf-4ea2-49cf-90da-463d9e49e9ae\"\n", "for keys in media_keys:\n", " if(media_dp1==keys):\n", " print(\"Hi\")" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "media_keys.values()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "xml_file = os.path.join(participants_path,xml_file)\n", "tree = ET.parse(xml_file)\n", "root = tree.getroot()\n", "key = root.find(\"Key\").text\n", "participant = root.find(\"Name\").text\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "key" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "for e in elem.iter():\n", " print(e)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "tree.getroot()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "for keys in participant_ids:\n", " try:\n", " next(media_key for media_key in media_keys if media_key==key)\n", " print (\"Match!\")\n", " except:\n", " print(\"there is not any match\")" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "len(participant_keys)==len(media_keys)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "print(test_key in participant_keys)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "\n", "def find_recording(recordings_path,hash_root):\n", " for file in os.listdir(recordings_path):\n", " if(\"mp4\" in file):\n", " root = file.split(\"==\")[0]\n", " if(root==hash_root):\n", " print(root,hash_root)\n", " return True " ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "for xml_file in os.listdir(xml_dir):\n", " hash_root = get_hash_root(os.path.join(xml_dir,xml_file))\n", " find_recording(recordings_path,hash_root)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "test_hash = \"4KpMREZBURFy2YS6VpeYYw\"\n", "for file in os.listdir(xml_dir):\n", " print(get_file_root(file) in test_hash)\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "def get_file_root(path):\n", " base=os.path.basename(path)\n", " return os.path.splitext(base)[0]" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.6" } }, "nbformat": 4, "nbformat_minor": 4 }