{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "## Create simplified label files\n", "\n", "Instead of having to have a *labels.csv and a *metadata.csv which are matched via another numeric index, we use the clip_id for the *labels.csv files so we can match directly with the Mel spectrogram *.npz files without the need of the additional *metadata.csv file.\n", "\n", "(These files are used in Tutorial Part 1)" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "from os.path import join" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "# DEFINE PATHS and FILE PATTERNS\n", "\n", "METADATA_PATH = 'metadata'\n", "\n", "# INPUT\n", "# here, %s will be replaced by 'instrumental', 'genres' or 'moods'\n", "LABEL_FILE_PATTERN = join(METADATA_PATH, 'ismir2018_tut_part_1_%s_labels_subset_post.csv') \n", "META_FILE_PATTERN = join(METADATA_PATH, 'ismir2018_tut_part_1_%s_metadata_subset.csv') \n", "\n", "# OUTPUT\n", "OUT_LABEL_FILE_PATTERN = join(METADATA_PATH, 'ismir2018_tut_part_1_%s_labels_subset_w_clipid.csv') " ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "tasks = ['instrumental','genres','moods']" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Task: instrumental\n", "Labels shape: (1680, 1)\n", "Metadata shape: (1703, 10)\n", "Assigning clip id from metadata to labels\n", "Created metadata/ismir2018_tut_part_1_instrumental_labels_subset_w_clipid.csv\n", "Task: genres\n", "Labels shape: (1998, 8)\n", "Metadata shape: (1998, 10)\n", "Assigning clip id from metadata to labels\n", "Created metadata/ismir2018_tut_part_1_genres_labels_subset_w_clipid.csv\n", "Task: moods\n", "Labels shape: (719, 4)\n", "Metadata shape: (719, 10)\n", "Assigning clip id from metadata to labels\n", "Created metadata/ismir2018_tut_part_1_moods_labels_subset_w_clipid.csv\n" ] } ], "source": [ "for task in tasks:\n", " label_file = LABEL_FILE_PATTERN % task\n", " meta_file = META_FILE_PATTERN % task\n", " label_file_out = OUT_LABEL_FILE_PATTERN % task\n", "\n", " labels = pd.read_csv(label_file, index_col=0)\n", " metadata = pd.read_csv(meta_file, index_col=0)\n", "\n", " print(\"Task:\", task)\n", " print(\"Labels shape:\", labels.shape)\n", " print(\"Metadata shape:\", metadata.shape)\n", " print(\"Assigning clip id from metadata to labels\")\n", "\n", " # replace the numeric index in labels by clip_id from metadata column 'clip_id'\n", " clip_ids_sorted_by_index_of_labels = metadata.loc[labels.index]['clip_id']\n", " labels.index = clip_ids_sorted_by_index_of_labels\n", " #print(labels.head())\n", "\n", " # write \n", " labels.to_csv(label_file_out)\n", " print(\"Created \" + label_file_out)" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
loudquietsoftstrange
clip_id
300640001
58620000
383621000
449010001
162460011
\n", "
" ], "text/plain": [ " loud quiet soft strange\n", "clip_id \n", "30064 0 0 0 1\n", "5862 0 0 0 0\n", "38362 1 0 0 0\n", "44901 0 0 0 1\n", "16246 0 0 1 1" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# just to check\n", "labels.head()" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
clip_idmp3_pathtrack_numbertitleartistalbumurlsegmentStartsegmentEndoriginal_url
1370830064D:/Research/Data/MIR/MagnaTagATune/mp3_full/c/...7-A Lake-LVX NovaLVX Novahttp://www.magnatune.com/artists/albums/lvxnov...5988http://he3.magnatune.com/all/07--A%20Lake--LVX...
26975862D:/Research/Data/MIR/MagnaTagATune/mp3_full/f/...2-BWV54 - II Recitative-American Bach SoloistsJ.S. Bach Solo Cantatashttp://www.magnatune.com/artists/albums/abs-so...3059http://he3.magnatune.com/all/02--BWV54%20-%20I...
1749538362D:/Research/Data/MIR/MagnaTagATune/mp3_full/0/...9-Die Today-Rocket City RiotLast Of The Pleasure Seekershttp://www.magnatune.com/artists/albums/rocket...88117http://he3.magnatune.com/all/09--Die%20Today--...
2043144901D:/Research/Data/MIR/MagnaTagATune/mp3_full/f/...11-In Gaway-The Headroom ProjectJetuton Andawaihttp://www.magnatune.com/artists/albums/headro...3059http://he3.magnatune.com/all/11--In%20Gaway--T...
742316246D:/Research/Data/MIR/MagnaTagATune/mp3_full/f/...4-Industrial Blues-The Headroom ProjectJetuton Andawaihttp://www.magnatune.com/artists/albums/headro...3059http://he3.magnatune.com/all/04--Industrial%20...
\n", "
" ], "text/plain": [ " clip_id mp3_path \\\n", "13708 30064 D:/Research/Data/MIR/MagnaTagATune/mp3_full/c/... \n", "2697 5862 D:/Research/Data/MIR/MagnaTagATune/mp3_full/f/... \n", "17495 38362 D:/Research/Data/MIR/MagnaTagATune/mp3_full/0/... \n", "20431 44901 D:/Research/Data/MIR/MagnaTagATune/mp3_full/f/... \n", "7423 16246 D:/Research/Data/MIR/MagnaTagATune/mp3_full/f/... \n", "\n", " track_number title artist \\\n", "13708 7 -A Lake- LVX Nova \n", "2697 2 -BWV54 - II Recitative- American Bach Soloists \n", "17495 9 -Die Today- Rocket City Riot \n", "20431 11 -In Gaway- The Headroom Project \n", "7423 4 -Industrial Blues- The Headroom Project \n", "\n", " album \\\n", "13708 LVX Nova \n", "2697 J.S. Bach Solo Cantatas \n", "17495 Last Of The Pleasure Seekers \n", "20431 Jetuton Andawai \n", "7423 Jetuton Andawai \n", "\n", " url segmentStart \\\n", "13708 http://www.magnatune.com/artists/albums/lvxnov... 59 \n", "2697 http://www.magnatune.com/artists/albums/abs-so... 30 \n", "17495 http://www.magnatune.com/artists/albums/rocket... 88 \n", "20431 http://www.magnatune.com/artists/albums/headro... 30 \n", "7423 http://www.magnatune.com/artists/albums/headro... 30 \n", "\n", " segmentEnd original_url \n", "13708 88 http://he3.magnatune.com/all/07--A%20Lake--LVX... \n", "2697 59 http://he3.magnatune.com/all/02--BWV54%20-%20I... \n", "17495 117 http://he3.magnatune.com/all/09--Die%20Today--... \n", "20431 59 http://he3.magnatune.com/all/11--In%20Gaway--T... \n", "7423 59 http://he3.magnatune.com/all/04--Industrial%20... " ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# just to check\n", "metadata.head()" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.5" } }, "nbformat": 4, "nbformat_minor": 2 }