{
 "metadata": {
  "name": "",
  "signature": "sha256:702f4e3daa7d61ea64794d92501dcc001d676c217353e61684b61350bed8a671"
 },
 "nbformat": 3,
 "nbformat_minor": 0,
 "worksheets": [
  {
   "cells": [
    {
     "cell_type": "markdown",
     "metadata": {},
     "source": [
      "Previously, your non-iPython script oscar.py gave you a text file oscar2chords.txt, in the form:\n",
      "\n",
      "Chord{....}..., c minor 7, 0.125, 0.800\n",
      "\n",
      "In this notebook, you extract this information and build a possible \"chord bank\" of possible chords\n",
      "and what triggers there are (e.g. certain note sequences) to play them. This is necessary because\n",
      "you need to get a unique collection of chords Oscar Peterson played, and your text file oscar2chords.txt\n",
      "is insufficient because it lists all chords that are played rather than only the unique ones.\n",
      "\n",
      "Output: a chord bank as a .txt file that you can use with the clustering algorithms and playback in \n",
      "the other notesbooks.\n",
      "\n",
      "Dependencies:\n",
      "\n",
      "--2. N-Gram"
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "from collections import Counter, defaultdict\n",
      "from sklearn.cluster import KMeans\n",
      "from mingus.midi import fluidsynth\n",
      "from mingus.containers import NoteContainer\n",
      "from mingus.containers.Bar import Bar\n",
      "import mingus.core.value as value\n",
      "import pandas as pd\n",
      "import numpy as np\n",
      "import sys, re, itertools, random\n",
      "sys.path.append('C:/Python27/Lib/site-packages')\n",
      "fluidsynth.init('/usr/share/sounds/sf2/FluidR3_GM.sf2',\"alsa\")"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "metadata": {},
       "output_type": "pyout",
       "prompt_number": 1,
       "text": [
        "True"
       ]
      }
     ],
     "prompt_number": 1
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "# Import the chord data.\n",
      "allchords = pd.read_csv('oscar2chords.txt', skiprows=2)[:].sort(\"Offset\")\n",
      "allchords.index = xrange(1, len(allchords) + 1)\n",
      "with open('oscar2chords.txt', 'rb') as f:\n",
      "    metmark = float(f.readline())\n",
      "    tsig_num, tsig_den = [i for i in f.readline().replace(' /', '').split()]\n",
      "    \n",
      "print \"Metronome, Timesig Numerator, Timesig Denominator, # chords played\"\n",
      "print metmark, tsig_num, tsig_den, len(allchords)\n",
      "allchords.sort(columns=\"Offset\", ascending=True)[:10]\n",
      "allchords.head()"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "output_type": "stream",
       "stream": "stdout",
       "text": [
        "Metronome, Timesig Numerator, Timesig Denominator, # chords played\n",
        "176.0 4 4 297\n"
       ]
      },
      {
       "html": [
        "<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
        "<table border=\"1\" class=\"dataframe\">\n",
        "  <thead>\n",
        "    <tr style=\"text-align: right;\">\n",
        "      <th></th>\n",
        "      <th>FullName</th>\n",
        "      <th>CommonName</th>\n",
        "      <th>Len</th>\n",
        "      <th>Offset</th>\n",
        "    </tr>\n",
        "  </thead>\n",
        "  <tbody>\n",
        "    <tr>\n",
        "      <th>1</th>\n",
        "      <td> Chord {D in octave 5 | C in octave 4 | E in oc...</td>\n",
        "      <td>   A6-perfect-fourth minor tetrachord</td>\n",
        "      <td> 1.125000</td>\n",
        "      <td> 8.000</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>2</th>\n",
        "      <td> Chord {A in octave 3 | G in octave 3 | E in oc...</td>\n",
        "      <td> A3-incomplete dominant-seventh chord</td>\n",
        "      <td> 1.250000</td>\n",
        "      <td> 8.000</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>3</th>\n",
        "      <td> Chord {E in octave 6 | E in octave 4 | D in oc...</td>\n",
        "      <td>                  D6-quartal trichord</td>\n",
        "      <td> 1.375000</td>\n",
        "      <td> 9.625</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>4</th>\n",
        "      <td> Chord {C in octave 4 | A in octave 5} Dotted Q...</td>\n",
        "      <td>                  A5-interval class 3</td>\n",
        "      <td> 1.500000</td>\n",
        "      <td> 9.625</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>5</th>\n",
        "      <td> Chord {G in octave 3 | A in octave 3} Quarter ...</td>\n",
        "      <td>                  A3-interval class 2</td>\n",
        "      <td> 1.666667</td>\n",
        "      <td> 9.625</td>\n",
        "    </tr>\n",
        "  </tbody>\n",
        "</table>\n",
        "<p>5 rows \u00d7 4 columns</p>\n",
        "</div>"
       ],
       "metadata": {},
       "output_type": "pyout",
       "prompt_number": 3,
       "text": [
        "                                            FullName  \\\n",
        "1  Chord {D in octave 5 | C in octave 4 | E in oc...   \n",
        "2  Chord {A in octave 3 | G in octave 3 | E in oc...   \n",
        "3  Chord {E in octave 6 | E in octave 4 | D in oc...   \n",
        "4  Chord {C in octave 4 | A in octave 5} Dotted Q...   \n",
        "5  Chord {G in octave 3 | A in octave 3} Quarter ...   \n",
        "\n",
        "                             CommonName       Len  Offset  \n",
        "1    A6-perfect-fourth minor tetrachord  1.125000   8.000  \n",
        "2  A3-incomplete dominant-seventh chord  1.250000   8.000  \n",
        "3                   D6-quartal trichord  1.375000   9.625  \n",
        "4                   A5-interval class 3  1.500000   9.625  \n",
        "5                   A3-interval class 2  1.666667   9.625  \n",
        "\n",
        "[5 rows x 4 columns]"
       ]
      }
     ],
     "prompt_number": 3
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "# Convert music21 note to mingus note.\n",
      "# This version (different from that in 3. Play Notes)\n",
      "# doesn't return a Note object: returns a string.\n",
      "def mingifytext(note):\n",
      "    accidental = re.compile(\"[A-Z](-|#)[0-9]\")\n",
      "    if accidental.match(note):\n",
      "        if '-' not in note: note = \"%s%s-%s\" % (note[0], note[1], note[2])\n",
      "        else: note = note.replace('-', 'b-')\n",
      "    else: note = \"%s-%s\" % (note[0], note[1])\n",
      "    return note\n",
      "\n",
      "# Given a MUSIC21 note, such as C5 or D#7, convert it\n",
      "# into a note on the keyboard between 0 and 87 inclusive.\n",
      "# Don't convert it for mingus; try to use music21 note style\n",
      "# as much as possible for all this stuff.\n",
      "def quantify(note):\n",
      "    notevals = {\n",
      "        'C' : 0,\n",
      "        'D' : 2,\n",
      "        'E' : 4,\n",
      "        'F' : 5,\n",
      "        'G' : 7,\n",
      "        'A' : 9,\n",
      "        'B' : 11\n",
      "    }\n",
      "    quantized = 0\n",
      "    octave = int(note[-1]) - 1\n",
      "    for i in note[:-1]:\n",
      "        if i in notevals: quantized += notevals[i]\n",
      "        if i == '-': quantized -= 1\n",
      "        if i == '#': quantized += 1\n",
      "    quantized += 12 * octave\n",
      "    return quantized\n",
      "\n",
      "# Extract notes in chords.\n",
      "# Shorter single-note chords: lowest prob of being played\n",
      "def getChords(allchords, mingify=True):\n",
      "    chords_poss = []\n",
      "    for chordname in allchords['FullName']:\n",
      "        notenames = re.findall(\"[CDEFGAB]+[-]*[sharp|flat]*[in octave]*[1-9]\", chordname)\n",
      "        for ix in xrange(len(notenames)):\n",
      "            notenames[ix] = notenames[ix].replace(\" in octave \", '').replace(\"-sharp\",\"#\").replace(\"-flat\",\"-\")\n",
      "        if mingify==True:\n",
      "            notenames = [mingifytext(note) for note in notenames]\n",
      "        else:\n",
      "            notenames = [note for note in notenames]\n",
      "        toDel = [ix for ix in xrange(len(notenames)) if \"6\" in notenames[ix] \n",
      "                 or \"5\" in notenames[ix]] # rm chords with notes too high, e.g. oct == 6 or 5\n",
      "        notenames = [i for ix, i in enumerate(notenames) if ix not in toDel]\n",
      "        if len(notenames) > 2: # min num of notes in valid chord = 3. Can change this\n",
      "            chords_poss.append(sorted(notenames)) # important to sort, else can't find duplicates\n",
      "    result = sorted(list(chords_poss for chords_poss,_ in itertools.groupby(chords_poss)))\n",
      "    result = list(result for result,_ in itertools.groupby(result))\n",
      "    return result\n",
      "\n",
      "oscarchords = getChords(allchords)\n",
      "print len(oscarchords)\n",
      "oscarchords[:10]"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "output_type": "stream",
       "stream": "stdout",
       "text": [
        "40\n"
       ]
      },
      {
       "metadata": {},
       "output_type": "pyout",
       "prompt_number": 9,
       "text": [
        "[['A-2', 'A-3', 'E-3'],\n",
        " ['A-3', 'A-4', 'C-4'],\n",
        " ['A-3', 'Bb-3', 'D-4'],\n",
        " ['A-3', 'Bb-3', 'D-4', 'F-4'],\n",
        " ['A-3', 'C#-3', 'E-4'],\n",
        " ['A-3', 'C#-4', 'F#-4'],\n",
        " ['A-3', 'C-3', 'C-4'],\n",
        " ['A-3', 'C-3', 'E-4'],\n",
        " ['A-3', 'C-4', 'D-4'],\n",
        " ['A-3', 'C-4', 'E-4', 'G-3']]"
       ]
      }
     ],
     "prompt_number": 9
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "# Write chords out into cleaned-up version of Oscar's chords\n",
      "with open(\"oscar2chords_extract.txt\", 'wb') as f:\n",
      "    for chord in oscarchords:\n",
      "        for n in chord:\n",
      "            f.write(n)\n",
      "            f.write(' ')\n",
      "        f.write('\\n')"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [],
     "prompt_number": 7
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "# Test audio. First populate bars.\n",
      "# Maybe this is a better way to play notes rather than one huge bar ---> same duration/each note.\n",
      "# allbars = []\n",
      "# for chord in uniquechords:\n",
      "#     b = Bar()\n",
      "#     b.set_meter((4, 4))\n",
      "#     b.place_notes(chord, 4)\n",
      "#     allbars.append(b)\n",
      "# for bar in allbars:\n",
      "#     fluidsynth.play_Bar(bar, 1, 300)"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [],
     "prompt_number": 6
    },
    {
     "cell_type": "markdown",
     "metadata": {},
     "source": [
      "Okay, now that you have a bunch of unique musical chords, the next step is to build a predictive model that takes in a sequence of notes (assume offsets all equal), and generates chords to accompany them. For example:\n",
      "\n",
      "$Notes:\\:\\:\\:D\\:\\:\\:F\\:\\:\\:A\\:\\:\\:C\\:\\:|\\:B\\:\\:\\:G\\:\\:\\:F\\:\\:\\:E-\\:\\:\\:|\\:E\n",
      "\\\\Chords:\\:D-7\\:\\:\\:\\:\\:\\:\\:\\:\\:\\:\\:\\:\\:|\\:\\:\\:\\:\\:\\:\\:G7\\:\\:\\:\\:\\:\\:\\:\\:\\:\\:\\:\\:\\:\\:\\:\\:\\:\\:\\:\\:|\\:CMaj7$\n",
      "\n",
      "Doesn't have to be perfect. Also, just assume 4/4 time, for putting the notes into bars later you'll just use 4/4 time for each\n",
      "and a note for each beat, this keeps things simpler. Can still have randomly generated gaps <b>between</b> clusters of course, but you assume that the improviser just plays fast straight 8ths for each cluster or musical idea."
     ]
    },
    {
     "cell_type": "code",
     "collapsed": true,
     "input": [
      "# Read in notes and convert into bitwise frame. But might be expensive.\n",
      "# note sequence is a list in music 21 style, (D/D-)\n",
      "# note that chordbank and notesequence should be in same format (mingus/m21)\n",
      "# Returns default dict with notes in whatever music21/mingus style chordbank is already in.\n",
      "from sklearn.cluster import Ward\n",
      "def comp(notesequence, chordbank):\n",
      "    \"\"\" Problem: how to determine whether chord \"fits\" note or not? \"\"\"\n",
      "    # Cluster notes into chunks\n",
      "    quantizednotes = np.array([quantify(note) for note in notesequence]).reshape(-1, 1)\n",
      "    wardclf = Ward()\n",
      "    wardclf.fit(quantizednotes)\n",
      "    \n",
      "    # get indices of first note in each cluster\n",
      "    firstixs = [0]\n",
      "    firstnotes = [notesequence[0]]\n",
      "    currLabel = 0\n",
      "    for ix, label in enumerate(wardclf.labels_):\n",
      "        if ix == 0: continue\n",
      "        if label != currLabel:\n",
      "            currLabel = label\n",
      "            firstixs.append(ix)\n",
      "            firstnotes.append(notesequence[ix])\n",
      "    \n",
      "    # For each start note of cluster, find random chord that also starts with that note.\n",
      "    allmatches = defaultdict()\n",
      "    for ix, note in zip(firstixs, firstnotes):\n",
      "        matching = [chord for chord in chordbank if chord[0][0] == note[0]] # null if no matches\n",
      "        if len(matching) >= 1:\n",
      "            allmatches[ix] = random.choice(matching)\n",
      "    \"\"\" Revise above chord-choosing section. Need to choose better chords -- maybe bigram or trigram model\n",
      "        with backoff? \"\"\"\n",
      "            \n",
      "    return allmatches\n",
      "    \n",
      "testnotes = ['D4','F4','A4','C5','E5','E-5','D-5','D5','C5','A4','A#4','B4','D5','F5',\n",
      "             'E5','E-5','D-5','D5','C5','A4','A#4','A-5','C5','A4','A#4','B4','D5',\n",
      "             'G5','G-5','F5','D5','C5','A4','A#4','A-5','C5','E-5','E5']\n",
      "chordmatches = comp(testnotes, oscarchords)\n",
      "chordmatches"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "metadata": {},
       "output_type": "pyout",
       "prompt_number": 7,
       "text": [
        "defaultdict(None, {32: ['A-2', 'A-3', 'E-3'], 34: ['A-2', 'A-3', 'E-3'], 9: ['A-2', 'A-3', 'E-3'], 19: ['A-2', 'A-3', 'E-3'], 21: ['A-2', 'A-3', 'E-3'], 23: ['A-2', 'A-3', 'E-3']})"
       ]
      }
     ],
     "prompt_number": 7
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "# Test doing notes with chords\n",
      "# fullbars = []\n",
      "# for ix, note in enumerate(testnotes):\n",
      "#     b = Bar()\n",
      "#     b.set_meter((4,4))\n",
      "#     if ix in chordmatches.keys():\n",
      "#         allbarnotes = chordmatches[ix]\n",
      "#         allbarnotes.append(mingifytext(note))\n",
      "#         b.place_notes(allbarnotes, 4)\n",
      "#         fullbars.append(b)\n",
      "#         continue\n",
      "#     b.place_notes(mingifytext(note), 4)\n",
      "#     fullbars.append(b)\n",
      "# for bar in fullbars:\n",
      "#     fluidsynth.play_Bar(bar, 1, 500)"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [],
     "prompt_number": 8
    }
   ],
   "metadata": {}
  }
 ]
}