{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### 1) Postprocess the instrumental/vocal label file\n",
"\n",
"We merge the two columns instrumental and vocal into one. Before that, we check for ambiguous entries."
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import numpy as np\n",
"import pandas as pd\n",
"from os.path import join"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"# DEFINE PATHS and FILE NAMES\n",
"\n",
"METADATA_PATH = 'metadata'\n",
"\n",
"IN_LABEL_FILE = join(METADATA_PATH, 'ismir2018_tut_part_1_instrumental_labels_subset.csv') \n",
"\n",
"OUT_LABEL_FILE = join(METADATA_PATH, 'ismir2018_tut_part_1_instrumental_labels_subset_post.csv') "
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"
\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" instrumental | \n",
" singing | \n",
"
\n",
" \n",
" \n",
" \n",
" 14 | \n",
" 0.0 | \n",
" 1.0 | \n",
"
\n",
" \n",
" 17 | \n",
" 0.0 | \n",
" 1.0 | \n",
"
\n",
" \n",
" 88 | \n",
" 1.0 | \n",
" 0.0 | \n",
"
\n",
" \n",
" 103 | \n",
" 0.0 | \n",
" 1.0 | \n",
"
\n",
" \n",
" 128 | \n",
" 0.0 | \n",
" 1.0 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" instrumental singing\n",
"14 0.0 1.0\n",
"17 0.0 1.0\n",
"88 1.0 0.0\n",
"103 0.0 1.0\n",
"128 0.0 1.0"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"labels = pd.read_csv(IN_LABEL_FILE, index_col=0) #, sep='\\t')\n",
"labels.head()"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"instrumental 443.0\n",
"singing 1283.0\n",
"dtype: float64"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# check class distribution\n",
"labels.sum()"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"1.0"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# we already removed the tracks that were *neither* instrumental *nor* vocal\n",
"labels.sum(axis=1).min()"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"2.0"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# unfortunately a few songs are labeled *both* instrumental *and* vocal\n",
"labels.sum(axis=1).max()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"We remove the ambiguously annotated tracks by using XOR to keep only tracks that are *either* instrumental *or* vocal
"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"14 True\n",
"17 True\n",
"88 True\n",
"103 True\n",
"128 True\n",
"Name: instrumental, dtype: bool"
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"retain = np.logical_xor(labels['instrumental'], labels['singing'])\n",
"retain.head()"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"For instrumental vs. vocal, from originally 1703 input examples, we can only retain 1680 trusted ones in our groundtruth\n"
]
}
],
"source": [
"# keep only ones that are set \"True\" in retain\n",
"n_orig = len(labels)\n",
"n_retain = sum(retain)\n",
"\n",
"labels = labels[retain]\n",
"\n",
"print(\"For instrumental vs. vocal, from originally\", n_orig, \"input examples, we can only retain\",n_retain, \"trusted ones in our groundtruth\")"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"instrumental 420.0\n",
"singing 1260.0\n",
"dtype: float64"
]
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# check class distribution after removal of ambigous tracks\n",
"labels.sum()"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" instrumental | \n",
" singing | \n",
"
\n",
" \n",
" \n",
" \n",
" 14 | \n",
" 0.0 | \n",
" 1.0 | \n",
"
\n",
" \n",
" 17 | \n",
" 0.0 | \n",
" 1.0 | \n",
"
\n",
" \n",
" 88 | \n",
" 1.0 | \n",
" 0.0 | \n",
"
\n",
" \n",
" 103 | \n",
" 0.0 | \n",
" 1.0 | \n",
"
\n",
" \n",
" 128 | \n",
" 0.0 | \n",
" 1.0 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" instrumental singing\n",
"14 0.0 1.0\n",
"17 0.0 1.0\n",
"88 1.0 0.0\n",
"103 0.0 1.0\n",
"128 0.0 1.0"
]
},
"execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"labels.head()"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" instrumental | \n",
"
\n",
" \n",
" \n",
" \n",
" 14 | \n",
" 0.0 | \n",
"
\n",
" \n",
" 17 | \n",
" 0.0 | \n",
"
\n",
" \n",
" 88 | \n",
" 1.0 | \n",
"
\n",
" \n",
" 103 | \n",
" 0.0 | \n",
"
\n",
" \n",
" 128 | \n",
" 0.0 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" instrumental\n",
"14 0.0\n",
"17 0.0\n",
"88 1.0\n",
"103 0.0\n",
"128 0.0"
]
},
"execution_count": 12,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# keep only 1 column as now they are redundant (one is the inverse of the other)\n",
"labels = labels[['instrumental']]\n",
"labels.head()"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"instrumental 420.0\n",
"dtype: float64"
]
},
"execution_count": 13,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# double-check number of instrumental tracks\n",
"labels.sum()"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Wrote metadata/ismir2018_tut_part_1_instrumental_labels_subset_post.csv\n"
]
}
],
"source": [
"# export file under new filename\n",
"labels.to_csv(OUT_LABEL_FILE)\n",
"print(\"Wrote \" + OUT_LABEL_FILE)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### 2) Postprocess the moods label file\n",
"\n",
"We retain only moods with a certain minimum number of instances."
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {},
"outputs": [],
"source": [
"# DEFINE PATHS and FILE NAMES\n",
"\n",
"METADATA_PATH = 'metadata'\n",
"\n",
"IN_LABEL_FILE = join(METADATA_PATH, 'ismir2018_tut_part_1_moods_labels_subset.csv') \n",
"\n",
"OUT_LABEL_FILE = join(METADATA_PATH, 'ismir2018_tut_part_1_moods_labels_subset_post.csv') \n",
"\n",
"MIN_MOODS = 100"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" airy | \n",
" calm | \n",
" dark | \n",
" deep | \n",
" different | \n",
" eerie | \n",
" happy | \n",
" light | \n",
" loud | \n",
" low | \n",
" mellow | \n",
" quiet | \n",
" sad | \n",
" scary | \n",
" soft | \n",
" strange | \n",
"
\n",
" \n",
" \n",
" \n",
" 13708 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 1 | \n",
"
\n",
" \n",
" 2697 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 1 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
"
\n",
" \n",
" 17495 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 1 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
"
\n",
" \n",
" 20431 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 1 | \n",
"
\n",
" \n",
" 7423 | \n",
" 0 | \n",
" 1 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 1 | \n",
" 1 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" airy calm dark deep different eerie happy light loud low \\\n",
"13708 0 0 0 0 0 0 0 0 0 0 \n",
"2697 0 0 0 0 0 0 0 0 0 0 \n",
"17495 0 0 0 0 0 0 0 0 1 0 \n",
"20431 0 0 0 0 0 0 0 0 0 0 \n",
"7423 0 1 0 0 0 0 0 0 0 0 \n",
"\n",
" mellow quiet sad scary soft strange \n",
"13708 0 0 0 0 0 1 \n",
"2697 0 0 1 0 0 0 \n",
"17495 0 0 0 0 0 0 \n",
"20431 0 0 0 0 0 1 \n",
"7423 0 0 0 0 1 1 "
]
},
"execution_count": 16,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"metadata = pd.read_csv(IN_LABEL_FILE, index_col=0) #, sep='\\t')\n",
"metadata.head()"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {
"scrolled": true
},
"outputs": [
{
"data": {
"text/plain": [
"airy 7\n",
"calm 25\n",
"dark 37\n",
"deep 17\n",
"different 23\n",
"eerie 14\n",
"happy 9\n",
"light 12\n",
"loud 209\n",
"low 21\n",
"mellow 16\n",
"quiet 177\n",
"sad 14\n",
"scary 9\n",
"soft 200\n",
"strange 120\n",
"dtype: int64"
]
},
"execution_count": 17,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# how many tracks per mood\n",
"metadata.sum()"
]
},
{
"cell_type": "code",
"execution_count": 18,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"airy False\n",
"calm False\n",
"dark False\n",
"deep False\n",
"different False\n",
"eerie False\n",
"happy False\n",
"light False\n",
"loud True\n",
"low False\n",
"mellow False\n",
"quiet True\n",
"sad False\n",
"scary False\n",
"soft True\n",
"strange True\n",
"dtype: bool"
]
},
"execution_count": 18,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"metadata.sum() >= MIN_MOODS"
]
},
{
"cell_type": "code",
"execution_count": 19,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Index(['loud', 'quiet', 'soft', 'strange'], dtype='object')"
]
},
"execution_count": 19,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"cols_retain = metadata.columns[(metadata.sum() >= MIN_MOODS)]\n",
"cols_retain"
]
},
{
"cell_type": "code",
"execution_count": 20,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" loud | \n",
" quiet | \n",
" soft | \n",
" strange | \n",
"
\n",
" \n",
" \n",
" \n",
" 13708 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 1 | \n",
"
\n",
" \n",
" 2697 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
"
\n",
" \n",
" 17495 | \n",
" 1 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
"
\n",
" \n",
" 20431 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 1 | \n",
"
\n",
" \n",
" 7423 | \n",
" 0 | \n",
" 0 | \n",
" 1 | \n",
" 1 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" loud quiet soft strange\n",
"13708 0 0 0 1\n",
"2697 0 0 0 0\n",
"17495 1 0 0 0\n",
"20431 0 0 0 1\n",
"7423 0 0 1 1"
]
},
"execution_count": 20,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"metadata = metadata[cols_retain]\n",
"metadata.head()"
]
},
{
"cell_type": "code",
"execution_count": 21,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"3"
]
},
"execution_count": 21,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# info: maximum number of concurrent moods\n",
"metadata.sum(axis=1).max()"
]
},
{
"cell_type": "code",
"execution_count": 22,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Wrote metadata/ismir2018_tut_part_1_moods_labels_subset_post.csv\n"
]
}
],
"source": [
"# export file under new filename\n",
"metadata.to_csv(OUT_LABEL_FILE)\n",
"print(\"Wrote \" + OUT_LABEL_FILE)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### 3) Genre label file\n",
"\n",
"No postprocessing. For filename compatibility we just copied the genres labels file to genres_post.csv."
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.5"
}
},
"nbformat": 4,
"nbformat_minor": 2
}