{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# XML cutter and midi formatter \n", "This script is prepared to parse a folder full of xml files, cut them to a certain length , *if desired* (according to a specific measure criteria) and output then a midi file with thiss new length.\n", "\n", "Afterwards, we are able to process these new midi files by using jSymbolic GUI if we want to include these features in our analyses. This notebook allso provides code for that purpose.\n", "\n", "To install `musif`:\n", "1. [Download](https://raw.githubusercontent.com/DIDONEproject/musif/main/docs/source/Cutter_and_midi_extractor.ipynb) this notebook.\n", "2. Start `jupyter` in your Anaconda environment.\n", "3. Open this script.\n", "4. Run the following cell by clicking on it and pressing Ctrl+Enter.\n", "\n", "https://musescore.org/es/download\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "%pip install musif" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import os\n", "import subprocess\n", "import sys\n", "from math import floor\n", "from os import path\n", "from pathlib import Path\n", "\n", "from music21.stream.base import Measure, Score\n", "from musif.common._utils import read_dicts_from_csv\n", "\n", "import musif.extract.constants as C\n", "from musif.extract.extract import parse_filename\n", "from musif.logs import perr, pinfo, pwarn\n", "\n", "sys.path.append(os.path.abspath('.'))\n", "from feature_extraction.custom_conf import CustomConf\n", "from musif.config import ExtractConfiguration" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import glob\n", "\n", "\n", "class CustomConf(ExtractConfiguration):\n", " def __init__(self, *args, **kwargs):\n", " super().__init__(*args, **kwargs)\n", " self._load_metadata()\n", "\n", " def _load_metadata(self) -> None:\n", " self.scores_metadata = {\n", " path.basename(file): read_dicts_from_csv(file)\n", " for file in glob(path.join(self.metadata_dir, \"*.csv\")) # type: ignore\n", " }\n", " if not self.scores_metadata:\n", " print(\n", " \"\\nMetadata could not be loaded properly!! Check metadata path in config file.\\n\"\n", " )" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Define specific functions for our script" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "def cut_by_measures_by_measure(cfg, data):\n", " score: Score = data[C.DATA_SCORE]\n", " last_measure = 1000000\n", " for metadata in cfg.scores_metadata[last_measure]:\n", " if metadata[\"FileName\"] == data[\"FileName\"]:\n", " last_measure = floor(float(metadata.get(cfg.end_of_theme_a, last_measure)))\n", " if last_measure == 0:\n", " name = data['file'].name\n", " pwarn(f'Last measure for {name} fil was found to be 0! Remember to update metadata before extraction ;) Setting last measure to the end of the score.\\n')\n", " last_measure = 1000000\n", " break\n", "\n", " remove_everything_after_measure(score, last_measure)\n", "\n", "def remove_everything_after_measure(score, last_measure):\n", " for part in score.parts:\n", " read_measures = 0\n", " elements_to_remove = []\n", " for measure in part.getElementsByClass(Measure):\n", " read_measures += 1\n", " if read_measures > last_measure:\n", " elements_to_remove.append(measure)\n", " part.remove(targetOrList=elements_to_remove)\n", "\n", "def save_xml(data, new_filename):\n", " new_filename = str(new_filename) + '.xml'\n", " data[C.DATA_SCORE].write('musicxml', fp=f'{new_filename}')\n", " \n", "def save_to_midi(filename):\n", " filename = str(filename)\n", " new_filename = filename + '.mid'\n", " if path.exists(new_filename):\n", " pinfo(f\"{filename} already exists as MIDI, skipping it!\")\n", " return\n", " cmd = [\"mscore\", \"-fo\", new_filename, filename + '.xml']\n", " pinfo(f\"Converting {filename} to MIDI\")\n", " try:\n", " subprocess.run(\n", " cmd,\n", " stdout=subprocess.DEVNULL,\n", " timeout=120,\n", " )\n", " except subprocess.TimeoutExpired:\n", " pwarn(\n", " f\"Continuing because time expired for file {filename}! Try running:\\n\"\n", " + \"\".join(cmd)\n", " ) " ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from musif.config import ExtractConfiguration\n", "\n", "custom_config = \"config.yml\"\n", "\n", "cfg = CustomConf(\n", " None,\n", " metadata_dir = \"your/metadata/dir\"\n", " expand_repeats = False,\n", " remove_unpitched_objects = True)\n", "data_path = 'data/xml/'\n", "data_path_cutted = Path('data/xml/cutted_themeA/')\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "for filename in sorted(Path(data_path).glob(f\"*.xml\")):\n", " data = {}\n", " new_filename = data_path_cutted / Path(filename.stem + '_cutted')\n", " if path.exists(str(new_filename) + '.xml'):\n", " pinfo(f\"{filename} already exists as cutted xml, skipping it!\")\n", " continue\n", " score = parse_filename(\n", " filename,\n", " None,\n", " expand_repeats=cfg.expand_repeats,\n", " export_dfs_to = None,\n", " remove_unpitched_objects=cfg.remove_unpitched_objects,\n", " )\n", " data[C.DATA_SCORE] = score\n", " data[C.DATA_FILE] = filename\n", " \n", " cut_by_measures_by_measure(cfg, data)\n", " data_path_cutted.mkdir(exist_ok=True)\n", " try: \n", " save_xml(data, new_filename)\n", " except Exception as e:\n", " perr(f'There was an error saving score {filename} to xml: {e}. Skipping it!')\n", " continue\n", " try: \n", " save_to_midi(new_filename)\n", " except Exception as e:\n", " perr(f'There was an error saving score {filename} to midi: {e}. Skipping it!')\n", "\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "--- In this part, we are free to use jSymbolic GUI to extract features from our recent created midi files. Afterwards, just run the following cell in order to join jSymbolic data to musif's extraction---\n", "\n", "Download: https://sourceforge.net/projects/jmir/files/jSymbolic/" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Merging musif data with jSymbolic extracted csv\n", "Now, if we want to merge jSymbolic data with our extracted musif df:" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "ename": "PermissionError", "evalue": "[Errno 13] Permission denied: '.'", "output_type": "error", "traceback": [ "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[1;31mPermissionError\u001b[0m Traceback (most recent call last)", "Cell \u001b[1;32mIn[2], line 4\u001b[0m\n\u001b[0;32m 1\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mpandas\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m \u001b[38;5;21;01mpd\u001b[39;00m\n\u001b[0;32m 3\u001b[0m path_to_musif_df \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124m.\u001b[39m\u001b[38;5;124m'\u001b[39m\n\u001b[1;32m----> 4\u001b[0m df_musif \u001b[38;5;241m=\u001b[39m \u001b[43mpd\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mread_csv\u001b[49m\u001b[43m(\u001b[49m\u001b[43mpath_to_musif_df\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mlow_memory\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mFalse\u001b[39;49;00m\u001b[43m)\u001b[49m\n\u001b[0;32m 6\u001b[0m path_to_jsymbollic_extracted_csv \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mextracted_feature_values.csv\u001b[39m\u001b[38;5;124m'\u001b[39m\n\u001b[0;32m 7\u001b[0m df_jSymbolic \u001b[38;5;241m=\u001b[39m pd\u001b[38;5;241m.\u001b[39mread_csv(path_to_jsymbollic_extracted_csv, low_memory\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mFalse\u001b[39;00m)\n", "File \u001b[1;32mc:\\Anaconda3\\envs\\musicai\\lib\\site-packages\\pandas\\io\\parsers\\readers.py:948\u001b[0m, in \u001b[0;36mread_csv\u001b[1;34m(filepath_or_buffer, sep, delimiter, header, names, index_col, usecols, dtype, engine, converters, true_values, false_values, skipinitialspace, skiprows, skipfooter, nrows, na_values, keep_default_na, na_filter, verbose, skip_blank_lines, parse_dates, infer_datetime_format, keep_date_col, date_parser, date_format, dayfirst, cache_dates, iterator, chunksize, compression, thousands, decimal, lineterminator, quotechar, quoting, doublequote, escapechar, comment, encoding, encoding_errors, dialect, on_bad_lines, delim_whitespace, low_memory, memory_map, float_precision, storage_options, dtype_backend)\u001b[0m\n\u001b[0;32m 935\u001b[0m kwds_defaults \u001b[38;5;241m=\u001b[39m _refine_defaults_read(\n\u001b[0;32m 936\u001b[0m dialect,\n\u001b[0;32m 937\u001b[0m delimiter,\n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 944\u001b[0m dtype_backend\u001b[38;5;241m=\u001b[39mdtype_backend,\n\u001b[0;32m 945\u001b[0m )\n\u001b[0;32m 946\u001b[0m kwds\u001b[38;5;241m.\u001b[39mupdate(kwds_defaults)\n\u001b[1;32m--> 948\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43m_read\u001b[49m\u001b[43m(\u001b[49m\u001b[43mfilepath_or_buffer\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mkwds\u001b[49m\u001b[43m)\u001b[49m\n", "File \u001b[1;32mc:\\Anaconda3\\envs\\musicai\\lib\\site-packages\\pandas\\io\\parsers\\readers.py:611\u001b[0m, in \u001b[0;36m_read\u001b[1;34m(filepath_or_buffer, kwds)\u001b[0m\n\u001b[0;32m 608\u001b[0m _validate_names(kwds\u001b[38;5;241m.\u001b[39mget(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mnames\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;28;01mNone\u001b[39;00m))\n\u001b[0;32m 610\u001b[0m \u001b[38;5;66;03m# Create the parser.\u001b[39;00m\n\u001b[1;32m--> 611\u001b[0m parser \u001b[38;5;241m=\u001b[39m TextFileReader(filepath_or_buffer, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwds)\n\u001b[0;32m 613\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m chunksize \u001b[38;5;129;01mor\u001b[39;00m iterator:\n\u001b[0;32m 614\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m parser\n", "File \u001b[1;32mc:\\Anaconda3\\envs\\musicai\\lib\\site-packages\\pandas\\io\\parsers\\readers.py:1448\u001b[0m, in \u001b[0;36mTextFileReader.__init__\u001b[1;34m(self, f, engine, **kwds)\u001b[0m\n\u001b[0;32m 1445\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39moptions[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mhas_index_names\u001b[39m\u001b[38;5;124m\"\u001b[39m] \u001b[38;5;241m=\u001b[39m kwds[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mhas_index_names\u001b[39m\u001b[38;5;124m\"\u001b[39m]\n\u001b[0;32m 1447\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mhandles: IOHandles \u001b[38;5;241m|\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[1;32m-> 1448\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_engine \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_make_engine\u001b[49m\u001b[43m(\u001b[49m\u001b[43mf\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mengine\u001b[49m\u001b[43m)\u001b[49m\n", "File \u001b[1;32mc:\\Anaconda3\\envs\\musicai\\lib\\site-packages\\pandas\\io\\parsers\\readers.py:1705\u001b[0m, in \u001b[0;36mTextFileReader._make_engine\u001b[1;34m(self, f, engine)\u001b[0m\n\u001b[0;32m 1703\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mb\u001b[39m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;129;01min\u001b[39;00m mode:\n\u001b[0;32m 1704\u001b[0m mode \u001b[38;5;241m+\u001b[39m\u001b[38;5;241m=\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mb\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m-> 1705\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mhandles \u001b[38;5;241m=\u001b[39m \u001b[43mget_handle\u001b[49m\u001b[43m(\u001b[49m\n\u001b[0;32m 1706\u001b[0m \u001b[43m \u001b[49m\u001b[43mf\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 1707\u001b[0m \u001b[43m \u001b[49m\u001b[43mmode\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 1708\u001b[0m \u001b[43m \u001b[49m\u001b[43mencoding\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43moptions\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mget\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mencoding\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mNone\u001b[39;49;00m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 1709\u001b[0m \u001b[43m \u001b[49m\u001b[43mcompression\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43moptions\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mget\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mcompression\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mNone\u001b[39;49;00m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 1710\u001b[0m \u001b[43m \u001b[49m\u001b[43mmemory_map\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43moptions\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mget\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mmemory_map\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mFalse\u001b[39;49;00m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 1711\u001b[0m \u001b[43m \u001b[49m\u001b[43mis_text\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mis_text\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 1712\u001b[0m \u001b[43m \u001b[49m\u001b[43merrors\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43moptions\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mget\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mencoding_errors\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mstrict\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 1713\u001b[0m \u001b[43m \u001b[49m\u001b[43mstorage_options\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43moptions\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mget\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mstorage_options\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mNone\u001b[39;49;00m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 1714\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 1715\u001b[0m \u001b[38;5;28;01massert\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mhandles \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[0;32m 1716\u001b[0m f \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mhandles\u001b[38;5;241m.\u001b[39mhandle\n", "File \u001b[1;32mc:\\Anaconda3\\envs\\musicai\\lib\\site-packages\\pandas\\io\\common.py:863\u001b[0m, in \u001b[0;36mget_handle\u001b[1;34m(path_or_buf, mode, encoding, compression, memory_map, is_text, errors, storage_options)\u001b[0m\n\u001b[0;32m 858\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(handle, \u001b[38;5;28mstr\u001b[39m):\n\u001b[0;32m 859\u001b[0m \u001b[38;5;66;03m# Check whether the filename is to be opened in binary mode.\u001b[39;00m\n\u001b[0;32m 860\u001b[0m \u001b[38;5;66;03m# Binary mode does not support 'encoding' and 'newline'.\u001b[39;00m\n\u001b[0;32m 861\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m ioargs\u001b[38;5;241m.\u001b[39mencoding \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mb\u001b[39m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;129;01min\u001b[39;00m ioargs\u001b[38;5;241m.\u001b[39mmode:\n\u001b[0;32m 862\u001b[0m \u001b[38;5;66;03m# Encoding\u001b[39;00m\n\u001b[1;32m--> 863\u001b[0m handle \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mopen\u001b[39;49m\u001b[43m(\u001b[49m\n\u001b[0;32m 864\u001b[0m \u001b[43m \u001b[49m\u001b[43mhandle\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 865\u001b[0m \u001b[43m \u001b[49m\u001b[43mioargs\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mmode\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 866\u001b[0m \u001b[43m \u001b[49m\u001b[43mencoding\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mioargs\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mencoding\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 867\u001b[0m \u001b[43m \u001b[49m\u001b[43merrors\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43merrors\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 868\u001b[0m \u001b[43m \u001b[49m\u001b[43mnewline\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[0;32m 869\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 870\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m 871\u001b[0m \u001b[38;5;66;03m# Binary mode\u001b[39;00m\n\u001b[0;32m 872\u001b[0m handle \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mopen\u001b[39m(handle, ioargs\u001b[38;5;241m.\u001b[39mmode)\n", "\u001b[1;31mPermissionError\u001b[0m: [Errno 13] Permission denied: '.'" ] } ], "source": [ "import pandas as pd\n", "\n", "path_to_musif_df = '.'\n", "df_musif = pd.read_csv(path_to_musif_df, low_memory=False)\n", "\n", "path_to_jsymbollic_extracted_csv = 'extracted_feature_values.csv'\n", "df_jSymbolic = pd.read_csv(path_to_jsymbollic_extracted_csv, low_memory=False)\n", "df_jSymbolic.rename(columns={'Unnamed: 0': 'FileName'})\n", "# -- here you must process the FileName column so both values in musif's df and in j_Symbolic match\n", "df_jSymbolic.columns = ['js_' + i for i in df_jSymbolic.columns] \n", "df_jSymbolic.rename(columns={'js_Unnamed: 0': 'FileName'}, inplace=True)\n", "df_jSymbolic['FileName'] = [i.replace('/Users/carlosvaquero/Downloads/midi_partial/', '').replace('.mid', '.xml') for i in df_jSymbolic['FileName']]\n", "\n", "# \n", "\n", "df_total = pd.merge(df_musif, df_jSymbolic, on='FileName')\n", "\n", "df_total.to_csv('total.csv', index=False)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "musicai", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.10.8" } }, "nbformat": 4, "nbformat_minor": 2 }