{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Create simplified label files\n",
"\n",
"Instead of having to have a *labels.csv and a *metadata.csv which are matched via another numeric index, we use the clip_id for the *labels.csv files so we can match directly with the Mel spectrogram *.npz files without the need of the additional *metadata.csv file.\n",
"\n",
"(These files are used in Tutorial Part 1)"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"from os.path import join"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"# DEFINE PATHS and FILE PATTERNS\n",
"\n",
"METADATA_PATH = 'metadata'\n",
"\n",
"# INPUT\n",
"# here, %s will be replaced by 'instrumental', 'genres' or 'moods'\n",
"LABEL_FILE_PATTERN = join(METADATA_PATH, 'ismir2018_tut_part_1_%s_labels_subset_post.csv') \n",
"META_FILE_PATTERN = join(METADATA_PATH, 'ismir2018_tut_part_1_%s_metadata_subset.csv') \n",
"\n",
"# OUTPUT\n",
"OUT_LABEL_FILE_PATTERN = join(METADATA_PATH, 'ismir2018_tut_part_1_%s_labels_subset_w_clipid.csv') "
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"tasks = ['instrumental','genres','moods']"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Task: instrumental\n",
"Labels shape: (1680, 1)\n",
"Metadata shape: (1703, 10)\n",
"Assigning clip id from metadata to labels\n",
"Created metadata/ismir2018_tut_part_1_instrumental_labels_subset_w_clipid.csv\n",
"Task: genres\n",
"Labels shape: (1998, 8)\n",
"Metadata shape: (1998, 10)\n",
"Assigning clip id from metadata to labels\n",
"Created metadata/ismir2018_tut_part_1_genres_labels_subset_w_clipid.csv\n",
"Task: moods\n",
"Labels shape: (719, 4)\n",
"Metadata shape: (719, 10)\n",
"Assigning clip id from metadata to labels\n",
"Created metadata/ismir2018_tut_part_1_moods_labels_subset_w_clipid.csv\n"
]
}
],
"source": [
"for task in tasks:\n",
" label_file = LABEL_FILE_PATTERN % task\n",
" meta_file = META_FILE_PATTERN % task\n",
" label_file_out = OUT_LABEL_FILE_PATTERN % task\n",
"\n",
" labels = pd.read_csv(label_file, index_col=0)\n",
" metadata = pd.read_csv(meta_file, index_col=0)\n",
"\n",
" print(\"Task:\", task)\n",
" print(\"Labels shape:\", labels.shape)\n",
" print(\"Metadata shape:\", metadata.shape)\n",
" print(\"Assigning clip id from metadata to labels\")\n",
"\n",
" # replace the numeric index in labels by clip_id from metadata column 'clip_id'\n",
" clip_ids_sorted_by_index_of_labels = metadata.loc[labels.index]['clip_id']\n",
" labels.index = clip_ids_sorted_by_index_of_labels\n",
" #print(labels.head())\n",
"\n",
" # write \n",
" labels.to_csv(label_file_out)\n",
" print(\"Created \" + label_file_out)"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"
\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" loud | \n",
" quiet | \n",
" soft | \n",
" strange | \n",
"
\n",
" \n",
" clip_id | \n",
" | \n",
" | \n",
" | \n",
" | \n",
"
\n",
" \n",
" \n",
" \n",
" 30064 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 1 | \n",
"
\n",
" \n",
" 5862 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
"
\n",
" \n",
" 38362 | \n",
" 1 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
"
\n",
" \n",
" 44901 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 1 | \n",
"
\n",
" \n",
" 16246 | \n",
" 0 | \n",
" 0 | \n",
" 1 | \n",
" 1 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" loud quiet soft strange\n",
"clip_id \n",
"30064 0 0 0 1\n",
"5862 0 0 0 0\n",
"38362 1 0 0 0\n",
"44901 0 0 0 1\n",
"16246 0 0 1 1"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# just to check\n",
"labels.head()"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" clip_id | \n",
" mp3_path | \n",
" track_number | \n",
" title | \n",
" artist | \n",
" album | \n",
" url | \n",
" segmentStart | \n",
" segmentEnd | \n",
" original_url | \n",
"
\n",
" \n",
" \n",
" \n",
" 13708 | \n",
" 30064 | \n",
" D:/Research/Data/MIR/MagnaTagATune/mp3_full/c/... | \n",
" 7 | \n",
" -A Lake- | \n",
" LVX Nova | \n",
" LVX Nova | \n",
" http://www.magnatune.com/artists/albums/lvxnov... | \n",
" 59 | \n",
" 88 | \n",
" http://he3.magnatune.com/all/07--A%20Lake--LVX... | \n",
"
\n",
" \n",
" 2697 | \n",
" 5862 | \n",
" D:/Research/Data/MIR/MagnaTagATune/mp3_full/f/... | \n",
" 2 | \n",
" -BWV54 - II Recitative- | \n",
" American Bach Soloists | \n",
" J.S. Bach Solo Cantatas | \n",
" http://www.magnatune.com/artists/albums/abs-so... | \n",
" 30 | \n",
" 59 | \n",
" http://he3.magnatune.com/all/02--BWV54%20-%20I... | \n",
"
\n",
" \n",
" 17495 | \n",
" 38362 | \n",
" D:/Research/Data/MIR/MagnaTagATune/mp3_full/0/... | \n",
" 9 | \n",
" -Die Today- | \n",
" Rocket City Riot | \n",
" Last Of The Pleasure Seekers | \n",
" http://www.magnatune.com/artists/albums/rocket... | \n",
" 88 | \n",
" 117 | \n",
" http://he3.magnatune.com/all/09--Die%20Today--... | \n",
"
\n",
" \n",
" 20431 | \n",
" 44901 | \n",
" D:/Research/Data/MIR/MagnaTagATune/mp3_full/f/... | \n",
" 11 | \n",
" -In Gaway- | \n",
" The Headroom Project | \n",
" Jetuton Andawai | \n",
" http://www.magnatune.com/artists/albums/headro... | \n",
" 30 | \n",
" 59 | \n",
" http://he3.magnatune.com/all/11--In%20Gaway--T... | \n",
"
\n",
" \n",
" 7423 | \n",
" 16246 | \n",
" D:/Research/Data/MIR/MagnaTagATune/mp3_full/f/... | \n",
" 4 | \n",
" -Industrial Blues- | \n",
" The Headroom Project | \n",
" Jetuton Andawai | \n",
" http://www.magnatune.com/artists/albums/headro... | \n",
" 30 | \n",
" 59 | \n",
" http://he3.magnatune.com/all/04--Industrial%20... | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" clip_id mp3_path \\\n",
"13708 30064 D:/Research/Data/MIR/MagnaTagATune/mp3_full/c/... \n",
"2697 5862 D:/Research/Data/MIR/MagnaTagATune/mp3_full/f/... \n",
"17495 38362 D:/Research/Data/MIR/MagnaTagATune/mp3_full/0/... \n",
"20431 44901 D:/Research/Data/MIR/MagnaTagATune/mp3_full/f/... \n",
"7423 16246 D:/Research/Data/MIR/MagnaTagATune/mp3_full/f/... \n",
"\n",
" track_number title artist \\\n",
"13708 7 -A Lake- LVX Nova \n",
"2697 2 -BWV54 - II Recitative- American Bach Soloists \n",
"17495 9 -Die Today- Rocket City Riot \n",
"20431 11 -In Gaway- The Headroom Project \n",
"7423 4 -Industrial Blues- The Headroom Project \n",
"\n",
" album \\\n",
"13708 LVX Nova \n",
"2697 J.S. Bach Solo Cantatas \n",
"17495 Last Of The Pleasure Seekers \n",
"20431 Jetuton Andawai \n",
"7423 Jetuton Andawai \n",
"\n",
" url segmentStart \\\n",
"13708 http://www.magnatune.com/artists/albums/lvxnov... 59 \n",
"2697 http://www.magnatune.com/artists/albums/abs-so... 30 \n",
"17495 http://www.magnatune.com/artists/albums/rocket... 88 \n",
"20431 http://www.magnatune.com/artists/albums/headro... 30 \n",
"7423 http://www.magnatune.com/artists/albums/headro... 30 \n",
"\n",
" segmentEnd original_url \n",
"13708 88 http://he3.magnatune.com/all/07--A%20Lake--LVX... \n",
"2697 59 http://he3.magnatune.com/all/02--BWV54%20-%20I... \n",
"17495 117 http://he3.magnatune.com/all/09--Die%20Today--... \n",
"20431 59 http://he3.magnatune.com/all/11--In%20Gaway--T... \n",
"7423 59 http://he3.magnatune.com/all/04--Industrial%20... "
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# just to check\n",
"metadata.head()"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.5"
}
},
"nbformat": 4,
"nbformat_minor": 2
}