{
 "metadata": {
  "name": "",
  "signature": "sha256:d64e2eb2383f6ac9c46fd19bb6123b3f7b2884f0507f8690f838b91a379ce9f6"
 },
 "nbformat": 3,
 "nbformat_minor": 0,
 "worksheets": [
  {
   "cells": [
    {
     "cell_type": "markdown",
     "metadata": {},
     "source": [
      "This takes in a file with notes (here, generated by oscar2.py) and uses the n-gram model to generate notes."
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "from collections import Counter, defaultdict\n",
      "from itertools import izip_longest\n",
      "from sklearn.cluster import KMeans\n",
      "from sklearn.preprocessing import normalize\n",
      "from itertools import groupby\n",
      "import pandas as pd\n",
      "import copy\n",
      "import numpy as np\n",
      "import sys\n",
      "sys.path.append('C:/Python27/Lib/site-packages')"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [],
     "prompt_number": 9
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "# Import and order the original notes.\n",
      "oscar2 = pd.read_csv('oscar2notes.txt', skiprows=2)[:].sort(\"Offset\")\n",
      "oscar2.index = xrange(1, len(oscar2) + 1)\n",
      "oscar2 = oscar2[oscar2.Octave >= 4] # threshold >= octave 4 for melodies\n",
      "with open('oscar2notes.txt', 'rb') as f:\n",
      "    metmark = float(f.readline())\n",
      "    tsig_num, tsig_den = [i for i in f.readline().replace(' /', '').split()]\n",
      "    \n",
      "print \"Metrics:\" \n",
      "print metmark, tsig_num, tsig_den, len(oscar2) # len shorter if octave cutoff, also if [:200] in .read_csv\n",
      "oscar2.head(20)"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "output_type": "stream",
       "stream": "stdout",
       "text": [
        "Metrics:\n",
        "176.0 4 4 1078\n"
       ]
      },
      {
       "html": [
        "<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
        "<table border=\"1\" class=\"dataframe\">\n",
        "  <thead>\n",
        "    <tr style=\"text-align: right;\">\n",
        "      <th></th>\n",
        "      <th>Note/Rest</th>\n",
        "      <th>Octave</th>\n",
        "      <th>Len</th>\n",
        "      <th>Offset</th>\n",
        "    </tr>\n",
        "  </thead>\n",
        "  <tbody>\n",
        "    <tr>\n",
        "      <th>2 </th>\n",
        "      <td>  D</td>\n",
        "      <td> 5</td>\n",
        "      <td> 0.750000</td>\n",
        "      <td> 12.666667</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>3 </th>\n",
        "      <td>  E</td>\n",
        "      <td> 4</td>\n",
        "      <td> 0.666667</td>\n",
        "      <td> 14.000000</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>4 </th>\n",
        "      <td> C#</td>\n",
        "      <td> 5</td>\n",
        "      <td> 0.875000</td>\n",
        "      <td> 14.000000</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>5 </th>\n",
        "      <td>  A</td>\n",
        "      <td> 5</td>\n",
        "      <td> 0.250000</td>\n",
        "      <td> 15.000000</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>6 </th>\n",
        "      <td>  F</td>\n",
        "      <td> 4</td>\n",
        "      <td> 3.125000</td>\n",
        "      <td> 16.000000</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>7 </th>\n",
        "      <td>  D</td>\n",
        "      <td> 5</td>\n",
        "      <td> 0.250000</td>\n",
        "      <td> 16.000000</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>8 </th>\n",
        "      <td>  A</td>\n",
        "      <td> 4</td>\n",
        "      <td> 3.125000</td>\n",
        "      <td> 16.000000</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>9 </th>\n",
        "      <td>  F</td>\n",
        "      <td> 5</td>\n",
        "      <td> 1.333333</td>\n",
        "      <td> 16.000000</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>10</th>\n",
        "      <td>  D</td>\n",
        "      <td> 5</td>\n",
        "      <td> 3.000000</td>\n",
        "      <td> 16.375000</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>11</th>\n",
        "      <td>  F</td>\n",
        "      <td> 5</td>\n",
        "      <td> 1.750000</td>\n",
        "      <td> 17.625000</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>12</th>\n",
        "      <td>  G</td>\n",
        "      <td> 4</td>\n",
        "      <td> 0.666667</td>\n",
        "      <td> 20.625000</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>13</th>\n",
        "      <td> B-</td>\n",
        "      <td> 4</td>\n",
        "      <td> 0.250000</td>\n",
        "      <td> 20.666667</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>14</th>\n",
        "      <td> E-</td>\n",
        "      <td> 4</td>\n",
        "      <td> 0.625000</td>\n",
        "      <td> 22.000000</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>15</th>\n",
        "      <td>  A</td>\n",
        "      <td> 4</td>\n",
        "      <td> 0.125000</td>\n",
        "      <td> 22.000000</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>17</th>\n",
        "      <td>  G</td>\n",
        "      <td> 4</td>\n",
        "      <td> 0.375000</td>\n",
        "      <td> 22.000000</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>18</th>\n",
        "      <td> B-</td>\n",
        "      <td> 5</td>\n",
        "      <td> 0.875000</td>\n",
        "      <td> 23.875000</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>19</th>\n",
        "      <td>  F</td>\n",
        "      <td> 4</td>\n",
        "      <td> 1.250000</td>\n",
        "      <td> 23.875000</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>20</th>\n",
        "      <td> B-</td>\n",
        "      <td> 5</td>\n",
        "      <td> 1.250000</td>\n",
        "      <td> 25.500000</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>21</th>\n",
        "      <td>  D</td>\n",
        "      <td> 6</td>\n",
        "      <td> 0.750000</td>\n",
        "      <td> 28.625000</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>22</th>\n",
        "      <td>  B</td>\n",
        "      <td> 5</td>\n",
        "      <td> 1.375000</td>\n",
        "      <td> 28.625000</td>\n",
        "    </tr>\n",
        "  </tbody>\n",
        "</table>\n",
        "<p>20 rows \u00d7 4 columns</p>\n",
        "</div>"
       ],
       "metadata": {},
       "output_type": "pyout",
       "prompt_number": 10,
       "text": [
        "   Note/Rest  Octave       Len     Offset\n",
        "2          D       5  0.750000  12.666667\n",
        "3          E       4  0.666667  14.000000\n",
        "4         C#       5  0.875000  14.000000\n",
        "5          A       5  0.250000  15.000000\n",
        "6          F       4  3.125000  16.000000\n",
        "7          D       5  0.250000  16.000000\n",
        "8          A       4  3.125000  16.000000\n",
        "9          F       5  1.333333  16.000000\n",
        "10         D       5  3.000000  16.375000\n",
        "11         F       5  1.750000  17.625000\n",
        "12         G       4  0.666667  20.625000\n",
        "13        B-       4  0.250000  20.666667\n",
        "14        E-       4  0.625000  22.000000\n",
        "15         A       4  0.125000  22.000000\n",
        "17         G       4  0.375000  22.000000\n",
        "18        B-       5  0.875000  23.875000\n",
        "19         F       4  1.250000  23.875000\n",
        "20        B-       5  1.250000  25.500000\n",
        "21         D       6  0.750000  28.625000\n",
        "22         B       5  1.375000  28.625000\n",
        "\n",
        "[20 rows x 4 columns]"
       ]
      }
     ],
     "prompt_number": 10
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "\"\"\" 1. Get generated notes based on the trigram model. \"\"\"\n",
      "\n",
      "# Iterate over a list in chunks of size n. Return tuples (for dict).\n",
      "def chunks(iterable, n):\n",
      "    for ix, item in enumerate(iterable):\n",
      "        if ix == len(iterable) - (n-1): return\n",
      "        yield tuple(iterable[ix:ix+n])\n",
      "\n",
      "# Build the conditional probability tables.\n",
      "def condProbTables(ngramfreqs, nngramfreqs):\n",
      "    nprobs = defaultdict(int)\n",
      "    prevnngramnexts = defaultdict(list)\n",
      "    for ngram, freq in ngramfreqs.items():\n",
      "        prevnngram = ngram[:-1]\n",
      "        currchar = ngram[-1]\n",
      "        nprobs[(currchar, prevnngram)] = float(ngramfreqs[ngram]) / nngramfreqs[prevnngram]\n",
      "        if prevnngram not in prevnngramnexts.keys():\n",
      "            prevnngramnexts[prevnngram].extend([(currchar, (float(ngramfreqs[ngram]) / nngramfreqs[prevnngram]))])\n",
      "            continue\n",
      "        prevnngramnexts[prevnngram].extend([(currchar, (float(ngramfreqs[ngram]) / nngramfreqs[prevnngram]))])\n",
      "    return nprobs, prevnngramnexts\n",
      "    \n",
      "# Yield the next note for a given n-gram model.\n",
      "# 'unitsize' is n, i.e. 3 for using trigrams.\n",
      "# args are the previous notes used to generate the next one.\n",
      "# Assumes # of args == same # for lookup in prevnnnexts\n",
      "def yieldNext(prevnnexts, *args):\n",
      "    lookup = tuple([a for a in args])\n",
      "    nexts = np.array(prevnnexts[lookup])\n",
      "    nextnotes = nexts[:,0]\n",
      "    probabilities = nexts[:,1]\n",
      "\n",
      "    # remove possibility of >= 3 notes in row for trigram model\n",
      "    if len(set(args)) == 1: # if prev notes = all same\n",
      "        ixToDel = []\n",
      "        for ix, (note, prob) in enumerate(zip(nextnotes, probabilities)):\n",
      "            if note in args:\n",
      "                ixToDel.append(ix)\n",
      "        nextnotes = np.delete(nextnotes, ixToDel)\n",
      "        probabilities = np.delete(probabilities, ixToDel)\n",
      "                \n",
      "    # Also to consider: remove notes in nextnotes if jump from octave 4 to 6 etc.\n",
      "    totalprob = 0; # assert is normalized\n",
      "    for p in probabilities: totalprob += float(p)\n",
      "    if totalprob != 1.0: probabilities = normList(probabilities)\n",
      "    return np.random.choice(nextnotes, p=probabilities)\n",
      "\n",
      "# Generate k trigrams; default is 100. Change # of trigrams here.\n",
      "def genTrigrams(prevbigramnexts, k=100):\n",
      "    note1 = \"start\"\n",
      "    note2 = \"start\"\n",
      "    note3 = note2\n",
      "    for i in xrange(k):\n",
      "        note3 = yieldNext(prevbigramnexts, note1, note2)\n",
      "        note1 = note2\n",
      "        note2 = note3\n",
      "        yield note3\n",
      "        \n",
      "\"\"\" 2. Generate the offsets using simple frequency probabilities. \"\"\"\n",
      "\n",
      "# Iterate over iterable in groups of n.\n",
      "def grouper(n, iterable, fillvalue=None):\n",
      "    for ix, i in enumerate(iterable):\n",
      "        if ix == len(iterable) - 1:\n",
      "            break\n",
      "        yield (iterable[ix], iterable[ix+1])\n",
      "        \n",
      "# Normalize an iterable.\n",
      "def normList(L, normalizeTo=1):\n",
      "    vMax = 0\n",
      "    for item in L:\n",
      "        vMax += float(item)\n",
      "    return [ float(x)/(vMax*1.0)*normalizeTo for x in L]\n",
      "\n",
      "# Round to nearest nth of a unit.\n",
      "def my_round(x, n=4):\n",
      "    return round(x*n)/n\n",
      "\n",
      "\"\"\" 3. Pruning. \n",
      "    For one, go through and make sure you don't get random tiny clusters \n",
      "    of notes + awkward octave jumps. If you have time later, do this dynamically \n",
      "    in generating the n-gram models above. \n",
      "    Assume Oscar doesn't play any repeated notes at his\n",
      "    ridiculously fast tempo (since consequence of n-gram model anyway). \"\"\"\n",
      "        \n",
      "# iterate through, remove if awkward jumps i.e. c6 b4 g4 e4 f6\n",
      "def findJumps(generated):\n",
      "    ixJumps = []\n",
      "    for ix, note in enumerate(gennotes):\n",
      "        if ix == len(gennotes) - 2:\n",
      "            break\n",
      "        currOct = note[-1]\n",
      "        nextOct = gennotes[ix+1][-1]\n",
      "        if np.abs(float(currOct) - float(nextOct)) > 1:\n",
      "            ixJumps.append(ix)\n",
      "    return ixJumps\n",
      "\n",
      "# Find jumps > 1 octave in the generated notes, and change so jump <= 1 oct.\n",
      "# For example, if have c4 g4 c6, changes g4 to g5.\n",
      "# Doesn't change original style too much, but solves n-gram problem noted in past literature.\n",
      "def smoothen(original):\n",
      "    gennotes = copy.deepcopy(original)\n",
      "    ixJumps = findJumps(gennotes)\n",
      "    for i in ixJumps:\n",
      "        if i == len(gennotes) - 1:\n",
      "            break\n",
      "        prevnote = gennotes[i]\n",
      "        nextnote = gennotes[i+1]\n",
      "        prevoct = float(prevnote[-1])\n",
      "        nextoct = float(nextnote[-1])\n",
      "        if prevoct > nextoct:\n",
      "            gennotes[i] = \"%s%s\" % (prevnote[:-1], int(prevnote[-1]) - 1)\n",
      "        elif prevoct < nextoct:\n",
      "            gennotes[i+1] = \"%s%s\" % (nextnote[:-1], int(nextnote[-1]) - 1)\n",
      "    return gennotes\n",
      "\n",
      "# Given the generated notes, removes duplicates\n",
      "# For example, c4 g5 g5 g5 e5 -> c4 g5 e5.\n",
      "def rmDuplicates(original):\n",
      "    gennotes = copy.deepcopy(original)\n",
      "    i = 0\n",
      "    while i < len(gennotes) - 1:\n",
      "        if gennotes[i] == gennotes[i+1]:\n",
      "            del gennotes[i]\n",
      "        else:\n",
      "            i += 1\n",
      "    return gennotes\n",
      "\n",
      "# Given the generated notes, remove isolated notes w/jumps too far apart.\n",
      "# For example, c6 g4 c6 --> c6 c6. only if adjacent = same octave\n",
      "# since say c6 g5 c4 could make good sense. (Run rmDup. again after this)\n",
      "def rmSingles(original):\n",
      "    gennotes = copy.deepcopy(original)\n",
      "    ixToDel = []\n",
      "    i = 0\n",
      "    while i < len(gennotes) - 1:\n",
      "        if i == 0: i+=1; continue\n",
      "        prevnote = gennotes[i-1]\n",
      "        currnote = gennotes[i]\n",
      "        nextnote = gennotes[i+1]\n",
      "        if (prevnote[-1] == nextnote[-1] and np.abs(float(prevnote[-1]) - float(currnote[-1])) > 0):\n",
      "            gennotes.pop(i)\n",
      "        i+=1\n",
      "    return gennotes"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [],
     "prompt_number": 11
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "\"\"\" The script to generate the notes.\"\"\"\n",
      "\n",
      "# Iterates over rows, where each element in the iterable is twofold: \n",
      "# element[0] = the index, element[1] = the note object\n",
      "possiblenotes = [\"%s%s\" % (row[1][\"Note/Rest\"], row[1][\"Octave\"]) for row in oscar2.iterrows()]\n",
      "possiblenotes.insert(0, \"start\")\n",
      "possiblenotes.insert(0, \"start\")\n",
      "possiblenotes.insert(0, \"start\")\n",
      "\n",
      "# Get trigram probabilities.\n",
      "bigramfreqs = defaultdict(int)\n",
      "for i in chunks(possiblenotes, 2):\n",
      "    bigramfreqs[i] += 1\n",
      "trigramfreqs = defaultdict(int)\n",
      "for i in chunks(possiblenotes, 3):\n",
      "    trigramfreqs[i] += 1\n",
      "    \n",
      "# encode trigram probabilities\n",
      "triprobs, prevbigramnexts = condProbTables(trigramfreqs, bigramfreqs)\n",
      "\n",
      "\"\"\" The offsets. \"\"\"\n",
      "\n",
      "offsets = defaultdict(int)\n",
      "genTuples = grouper(2, [float(i) for i in oscar2[\"Offset\"]])\n",
      "for j in genTuples:\n",
      "    toCompare = j\n",
      "    diff = float(toCompare[1]) - float(toCompare[0])\n",
      "    diff = my_round(diff)\n",
      "    if diff > 4: continue # can't have gaps > 4\n",
      "    offsets[diff] += 1 # set gaps nicely, only integer gaps.\n",
      "    \n",
      "offset_poss = [k for k in offsets]           # possible offsets. need separate for np.random.choice()\n",
      "offset_probs = [offsets[k] for k in offsets] # probabilities for each of those offset\n",
      "\n",
      "# prune offsets after normalizing so # possible offsets < 32 for np.random.choice()\n",
      "# durations: cutoff if over 6\n",
      "offset_ixToDel = [jx for jx, j in enumerate(offset_probs) if j < 5 and (offset_poss[jx] < 2)]\n",
      "offset_poss = [i for ix, i in enumerate(offset_poss) if ix not in offset_ixToDel]\n",
      "offset_probs = [j for jx, j in enumerate(offset_probs) if jx not in offset_ixToDel]\n",
      "for jx, j in enumerate(offset_poss):\n",
      "    if j <= 0:\n",
      "        del offset_poss[jx]\n",
      "        del offset_probs[jx]\n",
      "offset_probs = normList(offset_probs)\n",
      "\n",
      "# Cheap fix since too lazy to debug: generate n-grams, if not right number, redo.\n",
      "numberofngrams = 500 # fiddle with this\n",
      "numberGenerated = 0;\n",
      "while numberGenerated != numberofngrams: # remove while if decide to rm. duplicates\n",
      "    try: \n",
      "        gennotes = list(note for note in genTrigrams(prevbigramnexts, numberofngrams) if note != \"start\")\n",
      "        genoffsets = list(np.random.choice(offset_poss, p=offset_probs) for i in xrange(len(gennotes)))\n",
      "    except IndexError: \n",
      "        gennotes = list(note for note in genTrigrams(prevbigramnexts, numberofngrams) if note != \"start\")\n",
      "        genoffsets = list(np.random.choice(offset_poss, p=offset_probs) for i in xrange(len(gennotes)))\n",
      "    numberGenerated = len(gennotes)\n",
      "\n",
      "# Prune. Experiment with which to use, to see how close is to Oscar's style.\n",
      "gennotes = smoothen(gennotes)\n",
      "gennotes = rmDuplicates(gennotes)\n",
      "# gennotes = rmSingles(gennotes)\n",
      "# gennotes = rmDuplicates(gennotes)\n",
      "\n",
      "# Assert that you got the right # of notes.\n",
      "print \"# of notes generated after pruning: %s\" % len(gennotes)"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "output_type": "stream",
       "stream": "stdout",
       "text": [
        "# of notes generated after pruning: 489\n"
       ]
      }
     ],
     "prompt_number": 12
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "# you'll want to write directly out later instead of writing out\n",
      "# then reading in again\n",
      "with open(\"oscar2trigrams.txt\", 'wb') as f:\n",
      "    for note, length in zip(gennotes, genoffsets):\n",
      "        f.write(\"%s,%s\\n\" % (note, length))"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [],
     "prompt_number": 13
    },
    {
     "cell_type": "markdown",
     "metadata": {},
     "source": []
    }
   ],
   "metadata": {}
  }
 ]
}