# XML cutter and midi formatter 
This script is prepared to parse a folder full of xml files, cut them to a certain length , *if desired* (according to a specific measure criteria) and output then a midi file with thiss new length.

Afterwards, we are able to process these new midi files by using jSymbolic GUI if we want to include these features in our analyses. This notebook allso provides code for that purpose.

To install `musif`:
1. [Download](https://raw.githubusercontent.com/DIDONEproject/musif/main/docs/source/Cutter_and_midi_extractor.ipynb) this notebook.
2. Start `jupyter` in your Anaconda environment.
3. Open this script.
4. Run the following cell by clicking on it and pressing Ctrl+Enter.

https://musescore.org/es/download


In [None]:
%pip install musif

In [None]:
import os
import subprocess
import sys
from math import floor
from os import path
from pathlib import Path

from music21.stream.base import Measure, Score
from musif.common._utils import read_dicts_from_csv

import musif.extract.constants as C
from musif.extract.extract import parse_filename
from musif.logs import perr, pinfo, pwarn

sys.path.append(os.path.abspath('.'))
from feature_extraction.custom_conf import CustomConf
from musif.config import ExtractConfiguration

In [None]:
import glob


class CustomConf(ExtractConfiguration):
 def __init__(self, *args, **kwargs):
 super().__init__(*args, **kwargs)
 self._load_metadata()

 def _load_metadata(self) -> None:
 self.scores_metadata = {
 path.basename(file): read_dicts_from_csv(file)
 for file in glob(path.join(self.metadata_dir, "*.csv")) # type: ignore
 }
 if not self.scores_metadata:
 print(
 "\nMetadata could not be loaded properly!! Check metadata path in config file.\n"
 )

Define specific functions for our script

In [None]:
def cut_by_measures_by_measure(cfg, data):
 score: Score = data[C.DATA_SCORE]
 last_measure = 1000000
 for metadata in cfg.scores_metadata[last_measure]:
 if metadata["FileName"] == data["FileName"]:
 last_measure = floor(float(metadata.get(cfg.end_of_theme_a, last_measure)))
 if last_measure == 0:
 name = data['file'].name
 pwarn(f'Last measure for {name} fil was found to be 0! Remember to update metadata before extraction ;) Setting last measure to the end of the score.\n')
 last_measure = 1000000
 break

 remove_everything_after_measure(score, last_measure)

def remove_everything_after_measure(score, last_measure):
 for part in score.parts:
 read_measures = 0
 elements_to_remove = []
 for measure in part.getElementsByClass(Measure):
 read_measures += 1
 if read_measures > last_measure:
 elements_to_remove.append(measure)
 part.remove(targetOrList=elements_to_remove)

def save_xml(data, new_filename):
 new_filename = str(new_filename) + '.xml'
 data[C.DATA_SCORE].write('musicxml', fp=f'{new_filename}')
 
def save_to_midi(filename):
 filename = str(filename)
 new_filename = filename + '.mid'
 if path.exists(new_filename):
 pinfo(f"{filename} already exists as MIDI, skipping it!")
 return
 cmd = ["mscore", "-fo", new_filename, filename + '.xml']
 pinfo(f"Converting {filename} to MIDI")
 try:
 subprocess.run(
 cmd,
 stdout=subprocess.DEVNULL,
 timeout=120,
 )
 except subprocess.TimeoutExpired:
 pwarn(
 f"Continuing because time expired for file {filename}! Try running:\n"
 + "".join(cmd)
 ) 

In [None]:
from musif.config import ExtractConfiguration

custom_config = "config.yml"

cfg = CustomConf(
 None,
 metadata_dir = "your/metadata/dir"
 expand_repeats = False,
 remove_unpitched_objects = True)
data_path = 'data/xml/'
data_path_cutted = Path('data/xml/cutted_themeA/')


In [None]:
for filename in sorted(Path(data_path).glob(f"*.xml")):
 data = {}
 new_filename = data_path_cutted / Path(filename.stem + '_cutted')
 if path.exists(str(new_filename) + '.xml'):
 pinfo(f"{filename} already exists as cutted xml, skipping it!")
 continue
 score = parse_filename(
 filename,
 None,
 expand_repeats=cfg.expand_repeats,
 export_dfs_to = None,
 remove_unpitched_objects=cfg.remove_unpitched_objects,
 )
 data[C.DATA_SCORE] = score
 data[C.DATA_FILE] = filename
 
 cut_by_measures_by_measure(cfg, data)
 data_path_cutted.mkdir(exist_ok=True)
 try: 
 save_xml(data, new_filename)
 except Exception as e:
 perr(f'There was an error saving score {filename} to xml: {e}. Skipping it!')
 continue
 try: 
 save_to_midi(new_filename)
 except Exception as e:
 perr(f'There was an error saving score {filename} to midi: {e}. Skipping it!')



--- In this part, we are free to use jSymbolic GUI to extract features from our recent created midi files. Afterwards, just run the following cell in order to join jSymbolic data to musif's extraction---

Download: https://sourceforge.net/projects/jmir/files/jSymbolic/

# Merging musif data with jSymbolic extracted csv
Now, if we want to merge jSymbolic data with our extracted musif df:

In [2]:
import pandas as pd

path_to_musif_df = '.'
df_musif = pd.read_csv(path_to_musif_df, low_memory=False)

path_to_jsymbollic_extracted_csv = 'extracted_feature_values.csv'
df_jSymbolic = pd.read_csv(path_to_jsymbollic_extracted_csv, low_memory=False)
df_jSymbolic.rename(columns={'Unnamed: 0': 'FileName'})
# -- here you must process the FileName column so both values in musif's df and in j_Symbolic match
df_jSymbolic.columns = ['js_' + i for i in df_jSymbolic.columns] 
df_jSymbolic.rename(columns={'js_Unnamed: 0': 'FileName'}, inplace=True)
df_jSymbolic['FileName'] = [i.replace('/Users/carlosvaquero/Downloads/midi_partial/', '').replace('.mid', '.xml') for i in df_jSymbolic['FileName']]

# 

df_total = pd.merge(df_musif, df_jSymbolic, on='FileName')

df_total.to_csv('total.csv', index=False)

PermissionError: [Errno 13] Permission denied: '.'