{ "metadata": { "name": "", "signature": "sha256:d64e2eb2383f6ac9c46fd19bb6123b3f7b2884f0507f8690f838b91a379ce9f6" }, "nbformat": 3, "nbformat_minor": 0, "worksheets": [ { "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "This takes in a file with notes (here, generated by oscar2.py) and uses the n-gram model to generate notes." ] }, { "cell_type": "code", "collapsed": false, "input": [ "from collections import Counter, defaultdict\n", "from itertools import izip_longest\n", "from sklearn.cluster import KMeans\n", "from sklearn.preprocessing import normalize\n", "from itertools import groupby\n", "import pandas as pd\n", "import copy\n", "import numpy as np\n", "import sys\n", "sys.path.append('C:/Python27/Lib/site-packages')" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 9 }, { "cell_type": "code", "collapsed": false, "input": [ "# Import and order the original notes.\n", "oscar2 = pd.read_csv('oscar2notes.txt', skiprows=2)[:].sort(\"Offset\")\n", "oscar2.index = xrange(1, len(oscar2) + 1)\n", "oscar2 = oscar2[oscar2.Octave >= 4] # threshold >= octave 4 for melodies\n", "with open('oscar2notes.txt', 'rb') as f:\n", " metmark = float(f.readline())\n", " tsig_num, tsig_den = [i for i in f.readline().replace(' /', '').split()]\n", " \n", "print \"Metrics:\" \n", "print metmark, tsig_num, tsig_den, len(oscar2) # len shorter if octave cutoff, also if [:200] in .read_csv\n", "oscar2.head(20)" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "Metrics:\n", "176.0 4 4 1078\n" ] }, { "html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Note/RestOctaveLenOffset
2 D 5 0.750000 12.666667
3 E 4 0.666667 14.000000
4 C# 5 0.875000 14.000000
5 A 5 0.250000 15.000000
6 F 4 3.125000 16.000000
7 D 5 0.250000 16.000000
8 A 4 3.125000 16.000000
9 F 5 1.333333 16.000000
10 D 5 3.000000 16.375000
11 F 5 1.750000 17.625000
12 G 4 0.666667 20.625000
13 B- 4 0.250000 20.666667
14 E- 4 0.625000 22.000000
15 A 4 0.125000 22.000000
17 G 4 0.375000 22.000000
18 B- 5 0.875000 23.875000
19 F 4 1.250000 23.875000
20 B- 5 1.250000 25.500000
21 D 6 0.750000 28.625000
22 B 5 1.375000 28.625000
\n", "

20 rows \u00d7 4 columns

\n", "
" ], "metadata": {}, "output_type": "pyout", "prompt_number": 10, "text": [ " Note/Rest Octave Len Offset\n", "2 D 5 0.750000 12.666667\n", "3 E 4 0.666667 14.000000\n", "4 C# 5 0.875000 14.000000\n", "5 A 5 0.250000 15.000000\n", "6 F 4 3.125000 16.000000\n", "7 D 5 0.250000 16.000000\n", "8 A 4 3.125000 16.000000\n", "9 F 5 1.333333 16.000000\n", "10 D 5 3.000000 16.375000\n", "11 F 5 1.750000 17.625000\n", "12 G 4 0.666667 20.625000\n", "13 B- 4 0.250000 20.666667\n", "14 E- 4 0.625000 22.000000\n", "15 A 4 0.125000 22.000000\n", "17 G 4 0.375000 22.000000\n", "18 B- 5 0.875000 23.875000\n", "19 F 4 1.250000 23.875000\n", "20 B- 5 1.250000 25.500000\n", "21 D 6 0.750000 28.625000\n", "22 B 5 1.375000 28.625000\n", "\n", "[20 rows x 4 columns]" ] } ], "prompt_number": 10 }, { "cell_type": "code", "collapsed": false, "input": [ "\"\"\" 1. Get generated notes based on the trigram model. \"\"\"\n", "\n", "# Iterate over a list in chunks of size n. Return tuples (for dict).\n", "def chunks(iterable, n):\n", " for ix, item in enumerate(iterable):\n", " if ix == len(iterable) - (n-1): return\n", " yield tuple(iterable[ix:ix+n])\n", "\n", "# Build the conditional probability tables.\n", "def condProbTables(ngramfreqs, nngramfreqs):\n", " nprobs = defaultdict(int)\n", " prevnngramnexts = defaultdict(list)\n", " for ngram, freq in ngramfreqs.items():\n", " prevnngram = ngram[:-1]\n", " currchar = ngram[-1]\n", " nprobs[(currchar, prevnngram)] = float(ngramfreqs[ngram]) / nngramfreqs[prevnngram]\n", " if prevnngram not in prevnngramnexts.keys():\n", " prevnngramnexts[prevnngram].extend([(currchar, (float(ngramfreqs[ngram]) / nngramfreqs[prevnngram]))])\n", " continue\n", " prevnngramnexts[prevnngram].extend([(currchar, (float(ngramfreqs[ngram]) / nngramfreqs[prevnngram]))])\n", " return nprobs, prevnngramnexts\n", " \n", "# Yield the next note for a given n-gram model.\n", "# 'unitsize' is n, i.e. 3 for using trigrams.\n", "# args are the previous notes used to generate the next one.\n", "# Assumes # of args == same # for lookup in prevnnnexts\n", "def yieldNext(prevnnexts, *args):\n", " lookup = tuple([a for a in args])\n", " nexts = np.array(prevnnexts[lookup])\n", " nextnotes = nexts[:,0]\n", " probabilities = nexts[:,1]\n", "\n", " # remove possibility of >= 3 notes in row for trigram model\n", " if len(set(args)) == 1: # if prev notes = all same\n", " ixToDel = []\n", " for ix, (note, prob) in enumerate(zip(nextnotes, probabilities)):\n", " if note in args:\n", " ixToDel.append(ix)\n", " nextnotes = np.delete(nextnotes, ixToDel)\n", " probabilities = np.delete(probabilities, ixToDel)\n", " \n", " # Also to consider: remove notes in nextnotes if jump from octave 4 to 6 etc.\n", " totalprob = 0; # assert is normalized\n", " for p in probabilities: totalprob += float(p)\n", " if totalprob != 1.0: probabilities = normList(probabilities)\n", " return np.random.choice(nextnotes, p=probabilities)\n", "\n", "# Generate k trigrams; default is 100. Change # of trigrams here.\n", "def genTrigrams(prevbigramnexts, k=100):\n", " note1 = \"start\"\n", " note2 = \"start\"\n", " note3 = note2\n", " for i in xrange(k):\n", " note3 = yieldNext(prevbigramnexts, note1, note2)\n", " note1 = note2\n", " note2 = note3\n", " yield note3\n", " \n", "\"\"\" 2. Generate the offsets using simple frequency probabilities. \"\"\"\n", "\n", "# Iterate over iterable in groups of n.\n", "def grouper(n, iterable, fillvalue=None):\n", " for ix, i in enumerate(iterable):\n", " if ix == len(iterable) - 1:\n", " break\n", " yield (iterable[ix], iterable[ix+1])\n", " \n", "# Normalize an iterable.\n", "def normList(L, normalizeTo=1):\n", " vMax = 0\n", " for item in L:\n", " vMax += float(item)\n", " return [ float(x)/(vMax*1.0)*normalizeTo for x in L]\n", "\n", "# Round to nearest nth of a unit.\n", "def my_round(x, n=4):\n", " return round(x*n)/n\n", "\n", "\"\"\" 3. Pruning. \n", " For one, go through and make sure you don't get random tiny clusters \n", " of notes + awkward octave jumps. If you have time later, do this dynamically \n", " in generating the n-gram models above. \n", " Assume Oscar doesn't play any repeated notes at his\n", " ridiculously fast tempo (since consequence of n-gram model anyway). \"\"\"\n", " \n", "# iterate through, remove if awkward jumps i.e. c6 b4 g4 e4 f6\n", "def findJumps(generated):\n", " ixJumps = []\n", " for ix, note in enumerate(gennotes):\n", " if ix == len(gennotes) - 2:\n", " break\n", " currOct = note[-1]\n", " nextOct = gennotes[ix+1][-1]\n", " if np.abs(float(currOct) - float(nextOct)) > 1:\n", " ixJumps.append(ix)\n", " return ixJumps\n", "\n", "# Find jumps > 1 octave in the generated notes, and change so jump <= 1 oct.\n", "# For example, if have c4 g4 c6, changes g4 to g5.\n", "# Doesn't change original style too much, but solves n-gram problem noted in past literature.\n", "def smoothen(original):\n", " gennotes = copy.deepcopy(original)\n", " ixJumps = findJumps(gennotes)\n", " for i in ixJumps:\n", " if i == len(gennotes) - 1:\n", " break\n", " prevnote = gennotes[i]\n", " nextnote = gennotes[i+1]\n", " prevoct = float(prevnote[-1])\n", " nextoct = float(nextnote[-1])\n", " if prevoct > nextoct:\n", " gennotes[i] = \"%s%s\" % (prevnote[:-1], int(prevnote[-1]) - 1)\n", " elif prevoct < nextoct:\n", " gennotes[i+1] = \"%s%s\" % (nextnote[:-1], int(nextnote[-1]) - 1)\n", " return gennotes\n", "\n", "# Given the generated notes, removes duplicates\n", "# For example, c4 g5 g5 g5 e5 -> c4 g5 e5.\n", "def rmDuplicates(original):\n", " gennotes = copy.deepcopy(original)\n", " i = 0\n", " while i < len(gennotes) - 1:\n", " if gennotes[i] == gennotes[i+1]:\n", " del gennotes[i]\n", " else:\n", " i += 1\n", " return gennotes\n", "\n", "# Given the generated notes, remove isolated notes w/jumps too far apart.\n", "# For example, c6 g4 c6 --> c6 c6. only if adjacent = same octave\n", "# since say c6 g5 c4 could make good sense. (Run rmDup. again after this)\n", "def rmSingles(original):\n", " gennotes = copy.deepcopy(original)\n", " ixToDel = []\n", " i = 0\n", " while i < len(gennotes) - 1:\n", " if i == 0: i+=1; continue\n", " prevnote = gennotes[i-1]\n", " currnote = gennotes[i]\n", " nextnote = gennotes[i+1]\n", " if (prevnote[-1] == nextnote[-1] and np.abs(float(prevnote[-1]) - float(currnote[-1])) > 0):\n", " gennotes.pop(i)\n", " i+=1\n", " return gennotes" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 11 }, { "cell_type": "code", "collapsed": false, "input": [ "\"\"\" The script to generate the notes.\"\"\"\n", "\n", "# Iterates over rows, where each element in the iterable is twofold: \n", "# element[0] = the index, element[1] = the note object\n", "possiblenotes = [\"%s%s\" % (row[1][\"Note/Rest\"], row[1][\"Octave\"]) for row in oscar2.iterrows()]\n", "possiblenotes.insert(0, \"start\")\n", "possiblenotes.insert(0, \"start\")\n", "possiblenotes.insert(0, \"start\")\n", "\n", "# Get trigram probabilities.\n", "bigramfreqs = defaultdict(int)\n", "for i in chunks(possiblenotes, 2):\n", " bigramfreqs[i] += 1\n", "trigramfreqs = defaultdict(int)\n", "for i in chunks(possiblenotes, 3):\n", " trigramfreqs[i] += 1\n", " \n", "# encode trigram probabilities\n", "triprobs, prevbigramnexts = condProbTables(trigramfreqs, bigramfreqs)\n", "\n", "\"\"\" The offsets. \"\"\"\n", "\n", "offsets = defaultdict(int)\n", "genTuples = grouper(2, [float(i) for i in oscar2[\"Offset\"]])\n", "for j in genTuples:\n", " toCompare = j\n", " diff = float(toCompare[1]) - float(toCompare[0])\n", " diff = my_round(diff)\n", " if diff > 4: continue # can't have gaps > 4\n", " offsets[diff] += 1 # set gaps nicely, only integer gaps.\n", " \n", "offset_poss = [k for k in offsets] # possible offsets. need separate for np.random.choice()\n", "offset_probs = [offsets[k] for k in offsets] # probabilities for each of those offset\n", "\n", "# prune offsets after normalizing so # possible offsets < 32 for np.random.choice()\n", "# durations: cutoff if over 6\n", "offset_ixToDel = [jx for jx, j in enumerate(offset_probs) if j < 5 and (offset_poss[jx] < 2)]\n", "offset_poss = [i for ix, i in enumerate(offset_poss) if ix not in offset_ixToDel]\n", "offset_probs = [j for jx, j in enumerate(offset_probs) if jx not in offset_ixToDel]\n", "for jx, j in enumerate(offset_poss):\n", " if j <= 0:\n", " del offset_poss[jx]\n", " del offset_probs[jx]\n", "offset_probs = normList(offset_probs)\n", "\n", "# Cheap fix since too lazy to debug: generate n-grams, if not right number, redo.\n", "numberofngrams = 500 # fiddle with this\n", "numberGenerated = 0;\n", "while numberGenerated != numberofngrams: # remove while if decide to rm. duplicates\n", " try: \n", " gennotes = list(note for note in genTrigrams(prevbigramnexts, numberofngrams) if note != \"start\")\n", " genoffsets = list(np.random.choice(offset_poss, p=offset_probs) for i in xrange(len(gennotes)))\n", " except IndexError: \n", " gennotes = list(note for note in genTrigrams(prevbigramnexts, numberofngrams) if note != \"start\")\n", " genoffsets = list(np.random.choice(offset_poss, p=offset_probs) for i in xrange(len(gennotes)))\n", " numberGenerated = len(gennotes)\n", "\n", "# Prune. Experiment with which to use, to see how close is to Oscar's style.\n", "gennotes = smoothen(gennotes)\n", "gennotes = rmDuplicates(gennotes)\n", "# gennotes = rmSingles(gennotes)\n", "# gennotes = rmDuplicates(gennotes)\n", "\n", "# Assert that you got the right # of notes.\n", "print \"# of notes generated after pruning: %s\" % len(gennotes)" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "# of notes generated after pruning: 489\n" ] } ], "prompt_number": 12 }, { "cell_type": "code", "collapsed": false, "input": [ "# you'll want to write directly out later instead of writing out\n", "# then reading in again\n", "with open(\"oscar2trigrams.txt\", 'wb') as f:\n", " for note, length in zip(gennotes, genoffsets):\n", " f.write(\"%s,%s\\n\" % (note, length))" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 13 }, { "cell_type": "markdown", "metadata": {}, "source": [] } ], "metadata": {} } ] }