{ "metadata": { "name": "", "signature": "sha256:d5792b411611cb6fb56e0c06149988ae0a490b52f4999179a588da7fadf2da0f" }, "nbformat": 3, "nbformat_minor": 0, "worksheets": [ { "cells": [ { "cell_type": "code", "collapsed": false, "input": [ "import cPickle as pickle\n", "import os\n", "import re\n", "import sqlite3\n", "\n", "import nltk\n", "from nltk.stem import PorterStemmer" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 29 }, { "cell_type": "code", "collapsed": false, "input": [ "MSD_DIR = u'/q/boar/boar-p9/MillionSong/'\n", "MSD_LFM_ROOT = os.path.join(MSD_DIR, 'Lastfm')\n", "MSD_ADD = os.path.join(MSD_DIR, 'AdditionalFiles')" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 3 }, { "cell_type": "code", "collapsed": false, "input": [ "tags_dbfile = os.path.join(MSD_LFM_ROOT, 'lastfm_tags.db')\n", "uniq_tag_f = os.path.join(MSD_LFM_ROOT, 'unique_tags.txt')\n", "md_dbfile = 'track_metadata.db'" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 4 }, { "cell_type": "code", "collapsed": false, "input": [ "# shameless steal from https://github.com/bmcfee/hypergraph_playlist/blob/master/buildTagmatrix.py\n", "def getVocab(dbc):\n", " vocab = []\n", " cur = dbc.cursor()\n", " cur.execute('''SELECT tag FROM tags''')\n", " for (term,) in cur:\n", " vocab.append(term)\n", " pass\n", " return vocab\n", "\n", "def getTrackRows(dbc):\n", " cur = dbc.cursor()\n", " tid = {}\n", " cur.execute('''SELECT tid FROM tids''')\n", " for (i, (track,)) in enumerate(cur, 1):\n", " tid[track] = i\n", " pass\n", " return tid" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 5 }, { "cell_type": "code", "collapsed": false, "input": [ "with sqlite3.connect(tags_dbfile) as dbc:\n", " vocab = getVocab(dbc)\n", " tid = getTrackRows(dbc)" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 6 }, { "cell_type": "code", "collapsed": false, "input": [ "def tid_to_dir(base_dir, tid, ext='.h5'):\n", " return os.path.join(base_dir, '/'.join(tid[2:5]), tid + ext)\n", "\n", "def sanitize(tag):\n", " return re.sub(r'(\\W|_)+', '', re.sub('(&| n )', 'and', ' '.join([stemmer.stem(token) for token in nltk.word_tokenize(tag.lower())])))" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 7 }, { "cell_type": "code", "collapsed": false, "input": [ "filtered_tags = (# favorate/like/love/blabla\n", " 'favorites', 'Favorite', 'Favourites', 'favourite', 'favorite songs', 'Favourite Songs', 'favorite song', \n", " 'songs i love', 'lovedbybeyondwithin', 'Love it', 'love at first listen', 'fav', 'my favorite', 'top 40', \n", " 'songs I absolutely love', 'favs', 'My Favorites', 'Favorite Artists', 'All time favourites', \n", " 'personal favourites', 'favouritestreamable', 'favorite tracks', 'Favorite Bands', 'like it', \n", " 'I love this song', 'rex ferric faves', 'love to death', 'my gang 09', 'My Favourites', \n", " 'BeatbabeBop selection', 'I Like It', 'newbest', 'top', 'IIIIIIIIII AMAZING TRACK :D IIIIIIIIII', \n", " 'best songs of the 80s', 'LOVE LOVE LOVE', 'i love it', 'most loved',\n", " 'favorite by this group', 'amayzes loved', 'DJPMan-loved-tracks', 'best of 2008', 'loved', \n", " 'Makes Me Smile', '77davez-all-tracks', 'My pop music', 'best songs ever', 'favorite by this singer', \n", " 'I like', 'my music', 'Soundtrack Of My Life', 'UK top 40', 'Like', \n", " 'malloy2000 playlist - top songs - classical to metal', 'loved tracks',\n", " 'top artists', 'all time favorites', 'best songs of the 00s', 'favourite tracks', 'Solomusika-Loved', \n", " 'all time faves', 'british i like', 'Jills Station', 'de todo mio favoritos', 'Faves', 'Fave', \n", " 'acclaimed music top 3000', 'top 2000', 'leapsandloved', 'Radiotsar approved', \n", "\n", " # great/awesome/blabla\n", " 'kick ass', 'wonderful', 'excellent', 'Great Lyricists', 'badass', 'awesomeness', 'great song', 'Awesome',\n", " 'cool', 'amazing', 'good', 'nice', 'sweet', 'best', 'FUCKING AWESOME', 'lovely', 'Good Stuff', 'brilliant',\n", " 'feel good', 'perfect', 'all the best', 'cute', 'the best', '<3', 'interesting', 'feelgood', 'pretty', \n", " 'i feel good', 'good shit', 'good music', 'good song', 'great songs', 'yeah', 'best song ever', 'wow', \n", " 'worship', 'makes me happy', 'ok', 'damned good', 'underrated', 'Perfection', 'super',\n", " \n", " # rating\n", " '1', '3', '4', '5', '4 Stars', '3 stars', '4 Star', '3 star', '3-star',\n", " \n", " # year\n", " '1992', '1993', '1994', '1995', '1996', '1997', '1998', '1999', '2000', '2001', '2002', '2003', '2004', \n", " '2005', '2006', '2007', '2008', '2009', '2010', '00s', '10s', '1950s', '1960s', '1970s', '1980s', '1990s',\n", " '2000s', '20th Century', '21st century', \"50's\", '50s', \"60's\", '60s', '60s Gold', \"70's\", '70s', \"80's\", \n", " '80s', '80s Pop', '80s rock', \"90's\", '90s', '90s Rock',\n", " \n", " # descriptive\n", " 'songwriter', 'singer-songwriter', 'cover', 'covers', 'seen live', 'heard on Pandora', \n", " 'title is a full sentence', 'Retro', 'Miscellaneous', 'collection', 'billboard number ones', 'ost', \n", " 'cover song', 'singer songwriter', 'new', 'download', 'over 5 minutes long', 'Soundtracks', \n", " 'under two minutes', 'albums I own', 'cover songs', 'Radio', 'heard on last-fm', 'Soundtrack',\n", " \n", " # I don't know what you are talking about\n", " 'buy', 'lol', 'us', 'other', '2giveme5', 'i am a party girl here is my soundtrack', 'names', 'Tag', \n", " 'check out', 'f', 'test', 'out of our heads', 'me', 'I want back to the 80s', '9 lbs hammer', 'yes',\n", " 'streamable track wants', 'aitch', 'slgdmbestof', 'gotanygoodmusic', 'Brems tagg radio', 'gh 3',\n", " 'Sousaphonic AOTM 201102', 'fH Projex', 'GH10', 'Ion B radio', 'ik ben', 'quarkzangsun v1', \n", " ) " ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 8 }, { "cell_type": "code", "collapsed": false, "input": [ "stag_to_tag = dict()\n", "stemmer = PorterStemmer()\n", "\n", "# we only pick the tags with >= 1000 counts, otherwise it's just too noisy\n", "# e.g. \"writing papers to pay for the college you have gotten into\" has 13 counts \n", "with open(uniq_tag_f, 'rb') as f:\n", " for line in f:\n", " try:\n", " tag, count = line.strip().split('\\t', 2)\n", " if int(count) >= 1000:\n", " if not tag in filtered_tags:\n", " stag = sanitize(tag)\n", " if stag in stag_to_tag:\n", " stag_to_tag[stag].append(tag)\n", " else:\n", " stag_to_tag[stag] = [tag]\n", " else:\n", " # since the file is ordered by count\n", " break\n", " except ValueError as e:\n", " print 'The following line raises the error:', e\n", " # there is one line with no tag information, but with less than 1000 counts\n", " print line" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 9 }, { "cell_type": "code", "collapsed": false, "input": [ "tags = sorted(stag_to_tag.keys())" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 10 }, { "cell_type": "code", "collapsed": false, "input": [ "tags" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 11, "text": [ "['131',\n", " 'acidjazz',\n", " 'acordgenial',\n", " 'acoust',\n", " 'acoustguitar',\n", " 'acoustrock',\n", " 'addict',\n", " 'adrienwayn',\n", " 'adultaltern',\n", " 'adultcontemporari',\n", " 'africa',\n", " 'african',\n", " 'aggress',\n", " 'albumrock',\n", " 'allboutguitar',\n", " 'altcountri',\n", " 'altern',\n", " 'alternmetal',\n", " 'alternpop',\n", " 'alternpunk',\n", " 'alternrock',\n", " 'altrock',\n", " 'ambient',\n", " 'american',\n", " 'americana',\n", " 'angri',\n", " 'anim',\n", " 'annymix',\n", " 'anthem',\n", " 'arenarock',\n", " 'artrock',\n", " 'asubtluseofvocalharmoni',\n", " 'atmospher',\n", " 'attitud',\n", " 'audioas',\n", " 'australian',\n", " 'avantgard',\n", " 'avocalcentraesthet',\n", " 'awesomguitarjam',\n", " 'ballad',\n", " 'basicbass',\n", " 'bass',\n", " 'beat',\n", " 'beauti',\n", " 'bebop',\n", " 'berlin',\n", " 'bigband',\n", " 'bigbeat',\n", " 'bittersweet',\n", " 'black',\n", " 'blackmetal',\n", " 'blingtacular',\n", " 'blue',\n", " 'bluegrass',\n", " 'bluerock',\n", " 'bluesrock',\n", " 'bossanova',\n", " 'bounci',\n", " 'brain2brain',\n", " 'brasil',\n", " 'brazil',\n", " 'brazilian',\n", " 'break',\n", " 'breakbeat',\n", " 'bremtaggradio',\n", " 'british',\n", " 'britishrock',\n", " 'britpop',\n", " 'britrock',\n", " 'brutal',\n", " 'brutaldeathmetal',\n", " 'california',\n", " 'calm',\n", " 'canada',\n", " 'canadian',\n", " 'catchi',\n", " 'celestnostalgia',\n", " 'celtic',\n", " 'chanson',\n", " 'chansonfrancais',\n", " 'chill',\n", " 'chillout',\n", " 'christian',\n", " 'christianrock',\n", " 'christma',\n", " 'cinemat',\n", " 'classic',\n", " 'classicblue',\n", " 'classiccountri',\n", " 'classicmetal',\n", " 'classicrock',\n", " 'classicsoul',\n", " 'closeharmoni',\n", " 'club',\n", " 'comedi',\n", " 'contemporarichristian',\n", " 'contemporariclassic',\n", " 'contemporarigospeltag',\n", " 'contemporarijazz',\n", " 'cooljazz',\n", " 'countri',\n", " 'countrirock',\n", " 'crazi',\n", " 'crossov',\n", " 'crow',\n", " 'danc',\n", " 'danceabl',\n", " 'dancehal',\n", " 'dancemania',\n", " 'dancepop',\n", " 'dancparti',\n", " 'dancpop',\n", " 'dark',\n", " 'darkambient',\n", " 'darkelectro',\n", " 'darkwav',\n", " 'death',\n", " 'deathcor',\n", " 'deathmetal',\n", " 'deep',\n", " 'deephous',\n", " 'deltablue',\n", " 'depress',\n", " 'detroit',\n", " 'deutsch',\n", " 'deutschrock',\n", " 'disco',\n", " 'diva',\n", " 'dj',\n", " 'dnb',\n", " 'doom',\n", " 'doommetal',\n", " 'doowop',\n", " 'downbeat',\n", " 'downtempo',\n", " 'dramat',\n", " 'dream',\n", " 'dreami',\n", " 'dreampop',\n", " 'drive',\n", " 'drjazzmrfunkmus',\n", " 'drone',\n", " 'drug',\n", " 'drum',\n", " 'drumandbass',\n", " 'dub',\n", " 'dubstep',\n", " 'dutch',\n", " 'easi',\n", " 'easilisten',\n", " 'eastcoastrap',\n", " 'ebm',\n", " 'eddi',\n", " 'eighti',\n", " 'electro',\n", " 'electroclash',\n", " 'electrohous',\n", " 'electron',\n", " 'electronica',\n", " 'electronicad',\n", " 'electropop',\n", " 'emo',\n", " 'emocor',\n", " 'emot',\n", " 'emus',\n", " 'energet',\n", " 'energi',\n", " 'england',\n", " 'english',\n", " 'epic',\n", " 'espanol',\n", " 'essenti',\n", " 'ether',\n", " 'ethnic',\n", " 'eurod',\n", " 'european',\n", " 'exceltune',\n", " 'experiment',\n", " 'experimentrock',\n", " 'extensvamp',\n", " 'fast',\n", " 'femal',\n", " 'femalartist',\n", " 'femalfrontmetal',\n", " 'femalsinger',\n", " 'femalvocal',\n", " 'femalvocalist',\n", " 'femalvoic',\n", " 'finland',\n", " 'finnish',\n", " 'finnishmetal',\n", " 'flamenco',\n", " 'flute',\n", " 'folk',\n", " 'folkmetal',\n", " 'folkrock',\n", " 'fon',\n", " 'franc',\n", " 'francai',\n", " 'freedom',\n", " 'french',\n", " 'friendsofthekingofrummelpop',\n", " 'fun',\n", " 'funk',\n", " 'funki',\n", " 'funni',\n", " 'fusion',\n", " 'gangstarap',\n", " 'garag',\n", " 'garagrock',\n", " 'geil',\n", " 'geniu',\n", " 'german',\n", " 'germani',\n", " 'girlpower',\n", " 'gitarrenunterricht',\n", " 'glam',\n", " 'glammetal',\n", " 'glamrock',\n", " 'glitch',\n", " 'goa',\n", " 'goldenoldi',\n", " 'goodbeat',\n", " 'goodmood',\n", " 'gorgeou',\n", " 'gospel',\n", " 'goth',\n", " 'gothic',\n", " 'gothicmetal',\n", " 'gothicrock',\n", " 'gothrock',\n", " 'great',\n", " 'greatlyric',\n", " 'grindcor',\n", " 'groov',\n", " 'groovi',\n", " 'grung',\n", " 'guiltipleasur',\n", " 'guitar',\n", " 'guitarhero',\n", " 'guitarsolo',\n", " 'guitarvirtuoso',\n", " 'gutelaun',\n", " 'hairmetal',\n", " 'halftonsinglclub',\n", " 'halloween',\n", " 'handclap',\n", " 'happi',\n", " 'hard',\n", " 'hardcor',\n", " 'hardcorpunk',\n", " 'hardrock',\n", " 'harmonica',\n", " 'haunt',\n", " 'heartbreak',\n", " 'heavi',\n", " 'heavimetal',\n", " 'highschool',\n", " 'hiphop',\n", " 'horn',\n", " 'hot',\n", " 'hous',\n", " 'humor',\n", " 'hypnot',\n", " 'idm',\n", " 'indi',\n", " 'indietronica',\n", " 'indifolk',\n", " 'indipop',\n", " 'indirock',\n", " 'industri',\n", " 'industrimetal',\n", " 'industrirock',\n", " 'inspir',\n", " 'instrument',\n", " 'instrumentjazztag',\n", " 'instrumentrock',\n", " 'intens',\n", " 'irish',\n", " 'italian',\n", " 'italiana',\n", " 'jam',\n", " 'jamaica',\n", " 'japanes',\n", " 'jazz',\n", " 'jazzfunk',\n", " 'jazzfusion',\n", " 'jazzi',\n", " 'jazzinstrument',\n", " 'jazzpiano',\n", " 'jazzrock',\n", " 'jazzvocal',\n", " 'karlsruh',\n", " 'lacrimaindark',\n", " 'latenight',\n", " 'latin',\n", " 'latinjazz',\n", " 'latino',\n", " 'latinpop',\n", " 'latinrock',\n", " 'legend',\n", " 'life',\n", " 'light',\n", " 'lined',\n", " 'live',\n", " 'lofi',\n", " 'london',\n", " 'loneliafterdusk',\n", " 'loud',\n", " 'loung',\n", " 'love',\n", " 'lovesong',\n", " 'ls',\n", " 'lush',\n", " 'lyric',\n", " 'magic',\n", " 'majorkeytonal',\n", " 'male',\n", " 'malesing',\n", " 'malevocal',\n", " 'malevocalist',\n", " 'masterpiec',\n", " 'medit',\n", " 'melanchol',\n", " 'melancholi',\n", " 'mellow',\n", " 'melod',\n", " 'melodblackmetal',\n", " 'meloddeathmetal',\n", " 'melodhardcor',\n", " 'melodmetal',\n", " 'melodrock',\n", " 'melodtranc',\n", " 'memori',\n", " 'metal',\n", " 'metalcor',\n", " 'metrodowntempo',\n", " 'metroelectronica',\n", " 'metrofunki',\n", " 'metrojazz',\n", " 'mid',\n", " 'mildrhythmicsyncop',\n", " 'minim',\n", " 'minimtechno',\n", " 'minorkeytonal',\n", " 'mod',\n", " 'modernrock',\n", " 'moodi',\n", " 'morn',\n", " 'motown',\n", " 'mpb',\n", " 'music',\n", " 'musicspirit',\n", " 'musictofallasleepto',\n", " 'neosoul',\n", " 'newage',\n", " 'newromant',\n", " 'newwave',\n", " 'newyork',\n", " 'night',\n", " 'ninjatune',\n", " 'nois',\n", " 'noisrock',\n", " 'northernsoul',\n", " 'norwegian',\n", " 'nostalg',\n", " 'nostalgia',\n", " 'nujazz',\n", " 'numet',\n", " 'numetal',\n", " 'nyc',\n", " 'oi',\n", " 'oldfavorit',\n", " 'oldi',\n", " 'oldschool',\n", " 'oldschoolsoul',\n", " 'oldskool',\n", " 'parti',\n", " 'partimusic',\n", " 'peac',\n", " 'piano',\n", " 'pianorock',\n", " 'play',\n", " 'poetri',\n", " 'polish',\n", " 'polit',\n", " 'pop',\n", " 'poplife',\n", " 'poppunk',\n", " 'poprock',\n", " 'popular',\n", " 'posit',\n", " 'postgrung',\n", " 'posthardcor',\n", " 'postpunk',\n", " 'postrock',\n", " 'power',\n", " 'powerballad',\n", " 'powermetal',\n", " 'powerpop',\n", " 'prda',\n", " 'prog',\n", " 'progress',\n", " 'progressdeathmetal',\n", " 'progresshous',\n", " 'progressiv',\n", " 'progressmetal',\n", " 'progressrock',\n", " 'progresstranc',\n", " 'progrock',\n", " 'protopunk',\n", " 'psychedel',\n", " 'psychedelrock',\n", " 'psychil',\n", " 'psychobilli',\n", " 'psytranc',\n", " 'punk',\n", " 'punkfavorit',\n", " 'punkrock',\n", " 'q3',\n", " 'quiet',\n", " 'quietstorm',\n", " 'quirki',\n", " 'rain',\n", " 'rainiday',\n", " 'randb',\n", " 'rap',\n", " 'rapcor',\n", " 'rave',\n", " 'rb',\n", " 'reflect',\n", " 'regga',\n", " 'relax',\n", " 'remix',\n", " 'repetitmelodphrase',\n", " 'rhythmandblue',\n", " 'rhythumandbluetag',\n", " 'rnb',\n", " 'rock',\n", " 'rockabilli',\n", " 'rockandroll',\n", " 'rockballad',\n", " 'rockenespanol',\n", " 'rockin',\n", " 'rockpop',\n", " 'rockroll',\n", " 'rocksteadi',\n", " 'romanc',\n", " 'romant',\n", " 'romantica',\n", " 'root',\n", " 'rootregga',\n", " 'sad',\n", " 'salsa',\n", " 'samba',\n", " 'sanfrancisco',\n", " 'sax',\n", " 'saxophon',\n", " 'scandinavian',\n", " 'scottish',\n", " 'screamo',\n", " 'seattl',\n", " 'sensual',\n", " 'sentiment',\n", " 'sex',\n", " 'sexi',\n", " 'shoegaz',\n", " 'silentintens',\n", " 'singalong',\n", " 'singer',\n", " 'sixti',\n", " 'ska',\n", " 'skapunk',\n", " 'sleek',\n", " 'sleep',\n", " 'slgdm',\n", " 'slordig',\n", " 'slow',\n", " 'slowjam',\n", " 'sludg',\n", " 'smooth',\n", " 'smoothjazz',\n", " 'soft',\n", " 'softrock',\n", " 'solx',\n", " 'somafm',\n", " 'sommer',\n", " 'sooth',\n", " 'soul',\n", " 'soulandrnbclassic',\n", " 'soultag',\n", " 'soundstorm',\n", " 'southernrock',\n", " 'space',\n", " 'spacerock',\n", " 'spanish',\n", " 'spanishrock',\n", " 'special',\n", " 'speed',\n", " 'speedmetal',\n", " 'spiritu',\n", " 'spokenword',\n", " 'spring',\n", " 'stoner',\n", " 'stonerrock',\n", " 'stonesoup',\n", " 'stream',\n", " 'string',\n", " 'summer',\n", " 'sunday',\n", " 'sunni',\n", " 'suomi',\n", " 'suomipop',\n", " 'suomirock',\n", " 'surf',\n", " 'sweden',\n", " 'swedish',\n", " 'swedishmetal',\n", " 'swing',\n", " 'symphonmetal',\n", " 'symphonrock',\n", " 'synth',\n", " 'synthpop',\n", " 'tantotempotast',\n", " 'techhous',\n", " 'technicdeathmetal',\n", " 'techno',\n", " 'temazo',\n", " 'texa',\n", " 'thrash',\n", " 'thrashmetal',\n", " 'torquemada',\n", " 'tranc',\n", " 'triphop',\n", " 'trippi',\n", " 'trumpet',\n", " 'twee',\n", " 'uk',\n", " 'underground',\n", " 'undergroundhiphop',\n", " 'upbeat',\n", " 'uplift',\n", " 'uplifttranc',\n", " 'urban',\n", " 'usa',\n", " 'vikemetal',\n", " 'violin',\n", " 'vocal',\n", " 'vocalhous',\n", " 'vocaljazz',\n", " 'vocaltranc',\n", " 'warm',\n", " 'weird',\n", " 'westcoast',\n", " 'winter',\n", " 'work',\n", " 'workout',\n", " 'world',\n", " 'worldfusion',\n", " 'worldmusic',\n", " 'xma']" ] } ], "prompt_number": 11 }, { "cell_type": "code", "collapsed": false, "input": [ "import json\n", "\n", "with open('stag_to_tag.json') as f:\n", " stag_to_tag = json.load(f)" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 2 }, { "cell_type": "code", "collapsed": false, "input": [ "stag_to_tag" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 3, "text": [ "{u'131': [u'131'],\n", " u'acidjazz': [u'acid jazz'],\n", " u'acordgenial': [u'acordes geniales'],\n", " u'acoust': [u'acoustic'],\n", " u'acoustguitar': [u'acoustic guitar'],\n", " u'acoustrock': [u'Acoustic Rock'],\n", " u'addict': [u'addictive'],\n", " u'adrienwayn': [u'adrien wayne'],\n", " u'adultaltern': [u'Adult Alternative'],\n", " u'adultcontemporari': [u'adult contemporary'],\n", " u'africa': [u'africa'],\n", " u'african': [u'african'],\n", " u'aggress': [u'aggressive'],\n", " u'albumrock': [u'album rock'],\n", " u'allboutguitar': [u'allboutguitar'],\n", " u'altcountri': [u'Alt-country', u'alt country'],\n", " u'altern': [u'alternative'],\n", " u'alternmetal': [u'alternative metal'],\n", " u'alternpop': [u'alternative pop'],\n", " u'alternpunk': [u'Alternative Punk'],\n", " u'alternrock': [u'alternative rock', u'Alternate Rock'],\n", " u'altrock': [u'alt rock'],\n", " u'ambient': [u'ambient'],\n", " u'american': [u'american'],\n", " u'americana': [u'americana'],\n", " u'angri': [u'angry'],\n", " u'anim': [u'animals'],\n", " u'annymix': [u'annymix'],\n", " u'anthem': [u'anthem'],\n", " u'arenarock': [u'arena rock'],\n", " u'artrock': [u'art rock'],\n", " u'asubtluseofvocalharmoni': [u'a subtle use of vocal harmony'],\n", " u'atmospher': [u'atmospheric', u'Atmosphere'],\n", " u'attitud': [u'attitude'],\n", " u'audioas': [u'audioase'],\n", " u'australian': [u'australian'],\n", " u'avantgard': [u'Avant-Garde', u'avantgarde'],\n", " u'avocalcentraesthet': [u'a vocal-centric aesthetic'],\n", " u'awesomguitarjam': [u'Awesome Guitar Jams'],\n", " u'ballad': [u'Ballad', u'ballads'],\n", " u'basicbass': [u'Basically Bass'],\n", " u'bass': [u'bass'],\n", " u'beat': [u'beats', u'beat'],\n", " u'beauti': [u'beautiful'],\n", " u'bebop': [u'bebop'],\n", " u'berlin': [u'Berlin'],\n", " u'bigband': [u'Big Band'],\n", " u'bigbeat': [u'big beat'],\n", " u'bittersweet': [u'Bittersweet'],\n", " u'black': [u'Black'],\n", " u'blackmetal': [u'black metal'],\n", " u'blingtacular': [u'blingtacular'],\n", " u'blue': [u'blues', u'blue'],\n", " u'bluegrass': [u'bluegrass'],\n", " u'bluerock': [u'blues rock'],\n", " u'bluesrock': [u'blues-rock'],\n", " u'bossanova': [u'Bossa Nova'],\n", " u'bounci': [u'bouncy'],\n", " u'brain2brain': [u'brain2brain'],\n", " u'brasil': [u'brasil'],\n", " u'brazil': [u'brazil'],\n", " u'brazilian': [u'brazilian'],\n", " u'break': [u'breaks'],\n", " u'breakbeat': [u'breakbeat'],\n", " u'bremtaggradio': [u'Brems Tagg radio'],\n", " u'british': [u'british'],\n", " u'britishrock': [u'british rock'],\n", " u'britpop': [u'britpop', u'brit pop'],\n", " u'britrock': [u'brit rock', u'Britrock'],\n", " u'brutal': [u'brutal'],\n", " u'brutaldeathmetal': [u'Brutal Death Metal'],\n", " u'california': [u'California'],\n", " u'calm': [u'calm', u'Calming'],\n", " u'canada': [u'canada'],\n", " u'canadian': [u'Canadian'],\n", " u'catchi': [u'catchy'],\n", " u'celestnostalgia': [u'celeste nostalgia'],\n", " u'celtic': [u'celtic'],\n", " u'chanson': [u'chanson'],\n", " u'chansonfrancais': [u'chanson francaise'],\n", " u'chill': [u'chill', u'chilled'],\n", " u'chillout': [u'chillout', u'chill out'],\n", " u'christian': [u'christian'],\n", " u'christianrock': [u'christian rock'],\n", " u'christma': [u'christmas'],\n", " u'cinemat': [u'cinematic'],\n", " u'classic': [u'classic', u'Classical', u'classics'],\n", " u'classicblue': [u'Classic Blues'],\n", " u'classiccountri': [u'classic country'],\n", " u'classicmetal': [u'classic metal'],\n", " u'classicrock': [u'classic rock'],\n", " u'classicsoul': [u'classic soul'],\n", " u'closeharmoni': [u'close harmony'],\n", " u'club': [u'club'],\n", " u'comedi': [u'comedy'],\n", " u'contemporarichristian': [u'contemporary christian'],\n", " u'contemporariclassic': [u'contemporary classical'],\n", " u'contemporarigospeltag': [u'Contemporary Gospel Tag'],\n", " u'contemporarijazz': [u'contemporary jazz'],\n", " u'cooljazz': [u'cool jazz'],\n", " u'countri': [u'country'],\n", " u'countrirock': [u'country rock'],\n", " u'crazi': [u'crazy'],\n", " u'crossov': [u'crossover'],\n", " u'crow': [u'crowe'],\n", " u'danc': [u'dance', u'dancing'],\n", " u'danceabl': [u'danceable'],\n", " u'dancehal': [u'dancehall'],\n", " u'dancemania': [u'Dancemania'],\n", " u'dancepop': [u'dance-pop'],\n", " u'dancparti': [u'dance party'],\n", " u'dancpop': [u'dance pop'],\n", " u'dark': [u'dark'],\n", " u'darkambient': [u'dark ambient'],\n", " u'darkelectro': [u'dark electro'],\n", " u'darkwav': [u'darkwave'],\n", " u'death': [u'death'],\n", " u'deathcor': [u'deathcore'],\n", " u'deathmetal': [u'death metal'],\n", " u'deep': [u'deep'],\n", " u'deephous': [u'deep house'],\n", " u'deltablue': [u'delta blues'],\n", " u'depress': [u'depressing'],\n", " u'detroit': [u'detroit'],\n", " u'deutsch': [u'deutsch'],\n", " u'deutschrock': [u'Deutschrock'],\n", " u'disco': [u'Disco'],\n", " u'diva': [u'diva'],\n", " u'dj': [u'dj'],\n", " u'dnb': [u'dnb'],\n", " u'doom': [u'Doom'],\n", " u'doommetal': [u'doom metal'],\n", " u'doowop': [u'doo wop'],\n", " u'downbeat': [u'downbeat'],\n", " u'downtempo': [u'downtempo'],\n", " u'dramat': [u'dramatic'],\n", " u'dream': [u'Dream'],\n", " u'dreami': [u'Dreamy'],\n", " u'dreampop': [u'dream pop'],\n", " u'drive': [u'Driving'],\n", " u'drjazzmrfunkmus': [u'drjazzmrfunkmusic'],\n", " u'drone': [u'drone'],\n", " u'drug': [u'drugs'],\n", " u'drum': [u'drums'],\n", " u'drumandbass': [u'Drum and bass', u'Drum n Bass'],\n", " u'dub': [u'dub'],\n", " u'dubstep': [u'dubstep'],\n", " u'dutch': [u'dutch'],\n", " u'easi': [u'easy'],\n", " u'easilisten': [u'easy listening'],\n", " u'eastcoastrap': [u'east coast rap'],\n", " u'ebm': [u'ebm'],\n", " u'eddi': [u'eddie'],\n", " u'eighti': [u'eighties'],\n", " u'electro': [u'electro'],\n", " u'electroclash': [u'Electroclash'],\n", " u'electrohous': [u'electro house'],\n", " u'electron': [u'electronic'],\n", " u'electronica': [u'electronica'],\n", " u'electronicad': [u'ElectronicaDance'],\n", " u'electropop': [u'electropop', u'Electro Pop', u'electro-pop'],\n", " u'emo': [u'emo'],\n", " u'emocor': [u'emocore'],\n", " u'emot': [u'emotional', u'emotive'],\n", " u'emus': [u'emusic'],\n", " u'energet': [u'Energetic'],\n", " u'energi': [u'energy'],\n", " u'england': [u'england'],\n", " u'english': [u'english'],\n", " u'epic': [u'epic'],\n", " u'espanol': [u'Espanol'],\n", " u'essenti': [u'essentials'],\n", " u'ether': [u'ethereal'],\n", " u'ethnic': [u'ethnic'],\n", " u'eurod': [u'eurodance'],\n", " u'european': [u'european'],\n", " u'exceltune': [u'Excellent tune'],\n", " u'experiment': [u'experimental'],\n", " u'experimentrock': [u'Experimental Rock'],\n", " u'extensvamp': [u'extensive vamping'],\n", " u'fast': [u'fast'],\n", " u'femal': [u'female'],\n", " u'femalartist': [u'female artists'],\n", " u'femalfrontmetal': [u'Female fronted metal'],\n", " u'femalsinger': [u'female singers'],\n", " u'femalvocal': [u'female vocals', u'female vocal'],\n", " u'femalvocalist': [u'female vocalists', u'female vocalist'],\n", " u'femalvoic': [u'Female Voices'],\n", " u'finland': [u'finland'],\n", " u'finnish': [u'finnish'],\n", " u'finnishmetal': [u'finnish metal'],\n", " u'flamenco': [u'Flamenco'],\n", " u'flute': [u'flute'],\n", " u'folk': [u'folk'],\n", " u'folkmetal': [u'folk metal'],\n", " u'folkrock': [u'folk rock', u'folk-rock'],\n", " u'fon': [u'fon'],\n", " u'franc': [u'france'],\n", " u'francai': [u'francais'],\n", " u'freedom': [u'freedom'],\n", " u'french': [u'french'],\n", " u'friendsofthekingofrummelpop': [u'friendsofthekingofrummelpop'],\n", " u'fun': [u'fun'],\n", " u'funk': [u'funk'],\n", " u'funki': [u'funky'],\n", " u'funni': [u'funny'],\n", " u'fusion': [u'Fusion'],\n", " u'gangstarap': [u'Gangsta Rap'],\n", " u'garag': [u'garage'],\n", " u'garagrock': [u'Garage Rock'],\n", " u'geil': [u'geil'],\n", " u'geniu': [u'genius'],\n", " u'german': [u'german'],\n", " u'germani': [u'germany'],\n", " u'girlpower': [u'girl power'],\n", " u'gitarrenunterricht': [u'Gitarrenunterricht'],\n", " u'glam': [u'glam'],\n", " u'glammetal': [u'Glam Metal'],\n", " u'glamrock': [u'glam rock'],\n", " u'glitch': [u'glitch'],\n", " u'goa': [u'goa'],\n", " u'goldenoldi': [u'golden oldies'],\n", " u'goodbeat': [u'good beat'],\n", " u'goodmood': [u'good mood'],\n", " u'gorgeou': [u'gorgeous'],\n", " u'gospel': [u'gospel'],\n", " u'goth': [u'goth'],\n", " u'gothic': [u'Gothic'],\n", " u'gothicmetal': [u'Gothic Metal'],\n", " u'gothicrock': [u'Gothic Rock'],\n", " u'gothrock': [u'goth rock'],\n", " u'great': [u'great'],\n", " u'greatlyric': [u'great lyrics'],\n", " u'grindcor': [u'grindcore'],\n", " u'groov': [u'groove'],\n", " u'groovi': [u'groovy'],\n", " u'grung': [u'Grunge'],\n", " u'guiltipleasur': [u'Guilty Pleasures', u'guilty pleasure'],\n", " u'guitar': [u'guitar'],\n", " u'guitarhero': [u'Guitar Hero'],\n", " u'guitarsolo': [u'Guitar Solo'],\n", " u'guitarvirtuoso': [u'guitar virtuoso'],\n", " u'gutelaun': [u'gute laune'],\n", " u'hairmetal': [u'hair metal'],\n", " u'halftonsinglclub': [u'halftoned singles club'],\n", " u'halloween': [u'halloween'],\n", " u'handclap': [u'handclaps'],\n", " u'happi': [u'happy'],\n", " u'hard': [u'Hard'],\n", " u'hardcor': [u'hardcore'],\n", " u'hardcorpunk': [u'hardcore punk'],\n", " u'hardrock': [u'hard rock'],\n", " u'harmonica': [u'harmonica'],\n", " u'haunt': [u'haunting'],\n", " u'heartbreak': [u'heartbreak'],\n", " u'heavi': [u'heavy'],\n", " u'heavimetal': [u'heavy metal'],\n", " u'highschool': [u'High School'],\n", " u'hiphop': [u'Hip-Hop', u'hip hop', u'hiphop'],\n", " u'horn': [u'horns'],\n", " u'hot': [u'hot'],\n", " u'hous': [u'House'],\n", " u'humor': [u'humor'],\n", " u'hypnot': [u'hypnotic'],\n", " u'idm': [u'idm'],\n", " u'indi': [u'indie'],\n", " u'indietronica': [u'indietronica'],\n", " u'indifolk': [u'indie folk'],\n", " u'indipop': [u'indie pop'],\n", " u'indirock': [u'indie rock'],\n", " u'industri': [u'industrial'],\n", " u'industrimetal': [u'industrial metal'],\n", " u'industrirock': [u'industrial rock'],\n", " u'inspir': [u'inspirational', u'inspiring'],\n", " u'instrument': [u'instrumental'],\n", " u'instrumentjazztag': [u'Instrumental Jazz Tag'],\n", " u'instrumentrock': [u'instrumental rock'],\n", " u'intens': [u'intense'],\n", " u'irish': [u'irish'],\n", " u'italian': [u'italian'],\n", " u'italiana': [u'italiana'],\n", " u'jam': [u'Jam'],\n", " u'jamaica': [u'jamaica'],\n", " u'japanes': [u'japanese'],\n", " u'jazz': [u'jazz'],\n", " u'jazzfunk': [u'jazz funk'],\n", " u'jazzfusion': [u'jazz fusion'],\n", " u'jazzi': [u'jazzy'],\n", " u'jazzinstrument': [u'jazz instrumental'],\n", " u'jazzpiano': [u'jazz piano'],\n", " u'jazzrock': [u'Jazz Rock'],\n", " u'jazzvocal': [u'jazz vocal'],\n", " u'karlsruh': [u'Karlsruhe'],\n", " u'lacrimaindark': [u'lacrimaindarkness'],\n", " u'latenight': [u'late night'],\n", " u'latin': [u'latin'],\n", " u'latinjazz': [u'latin jazz'],\n", " u'latino': [u'latino'],\n", " u'latinpop': [u'latin pop'],\n", " u'latinrock': [u'Latin Rock'],\n", " u'legend': [u'legend'],\n", " u'life': [u'life'],\n", " u'light': [u'light'],\n", " u'lined': [u'linedance'],\n", " u'live': [u'live'],\n", " u'lofi': [u'Lo-Fi'],\n", " u'london': [u'london'],\n", " u'loneliafterdusk': [u'loneliness after dusk'],\n", " u'loud': [u'loud'],\n", " u'loung': [u'lounge'],\n", " u'love': [u'Love'],\n", " u'lovesong': [u'love songs', u'love song', u'lovesongs'],\n", " u'ls': [u'ls'],\n", " u'lush': [u'lush'],\n", " u'lyric': [u'lyrics'],\n", " u'magic': [u'magic'],\n", " u'majorkeytonal': [u'major key tonality'],\n", " u'male': [u'male'],\n", " u'malesing': [u'malesinger'],\n", " u'malevocal': [u'male vocals', u'male vocal'],\n", " u'malevocalist': [u'male vocalists', u'male vocalist'],\n", " u'masterpiec': [u'Masterpiece'],\n", " u'medit': [u'Meditation'],\n", " u'melanchol': [u'melancholic'],\n", " u'melancholi': [u'melancholy'],\n", " u'mellow': [u'Mellow'],\n", " u'melod': [u'melodic'],\n", " u'melodblackmetal': [u'melodic black metal'],\n", " u'meloddeathmetal': [u'Melodic Death Metal'],\n", " u'melodhardcor': [u'melodic hardcore'],\n", " u'melodmetal': [u'melodic metal'],\n", " u'melodrock': [u'melodic rock'],\n", " u'melodtranc': [u'melodic trance'],\n", " u'memori': [u'memories'],\n", " u'metal': [u'metal', u'metallis'],\n", " u'metalcor': [u'metalcore'],\n", " u'metrodowntempo': [u'metro downtempo'],\n", " u'metroelectronica': [u'metro electronica'],\n", " u'metrofunki': [u'metro funky'],\n", " u'metrojazz': [u'Metro Jazz'],\n", " u'mid': [u'mid'],\n", " u'mildrhythmicsyncop': [u'mild rhythmic syncopation'],\n", " u'minim': [u'minimal'],\n", " u'minimtechno': [u'minimal techno'],\n", " u'minorkeytonal': [u'minor key tonality'],\n", " u'mod': [u'mod'],\n", " u'modernrock': [u'modern rock'],\n", " u'moodi': [u'moody'],\n", " u'morn': [u'morning'],\n", " u'motown': [u'motown'],\n", " u'mpb': [u'mpb'],\n", " u'music': [u'music'],\n", " u'musicspirit': [u'musicspirit'],\n", " u'musictofallasleepto': [u'music to fall asleep to'],\n", " u'neosoul': [u'Neo-Soul', u'Neo Soul'],\n", " u'newage': [u'new age'],\n", " u'newromant': [u'new romantic'],\n", " u'newwave': [u'new wave'],\n", " u'newyork': [u'new york'],\n", " u'night': [u'night'],\n", " u'ninjatune': [u'ninja tune'],\n", " u'nois': [u'noise'],\n", " u'noisrock': [u'noise rock'],\n", " u'northernsoul': [u'northern soul'],\n", " u'norwegian': [u'norwegian'],\n", " u'nostalg': [u'nostalgic'],\n", " u'nostalgia': [u'nostalgia'],\n", " u'nujazz': [u'nu jazz', u'nu-jazz'],\n", " u'numet': [u'Nu-metal'],\n", " u'numetal': [u'Nu Metal'],\n", " u'nyc': [u'NYC'],\n", " u'oi': [u'Oi'],\n", " u'oldfavorit': [u'old favorites'],\n", " u'oldi': [u'oldies'],\n", " u'oldschool': [u'old school'],\n", " u'oldschoolsoul': [u'Old School soul'],\n", " u'oldskool': [u'old skool'],\n", " u'parti': [u'party'],\n", " u'partimusic': [u'party music'],\n", " u'peac': [u'peaceful'],\n", " u'piano': [u'piano'],\n", " u'pianorock': [u'piano rock'],\n", " u'play': [u'Playful'],\n", " u'poetri': [u'poetry'],\n", " u'polish': [u'polish'],\n", " u'polit': [u'political'],\n", " u'pop': [u'pop'],\n", " u'poplife': [u'Pop Life'],\n", " u'poppunk': [u'pop punk', u'Pop-punk'],\n", " u'poprock': [u'pop rock', u'Pop-Rock', u'poprock'],\n", " u'popular': [u'popular'],\n", " u'posit': [u'positive'],\n", " u'postgrung': [u'post-grunge'],\n", " u'posthardcor': [u'post-hardcore', u'post hardcore'],\n", " u'postpunk': [u'post-punk', u'Post punk'],\n", " u'postrock': [u'post-rock', u'post rock'],\n", " u'power': [u'powerful', u'power'],\n", " u'powerballad': [u'Power ballad'],\n", " u'powermetal': [u'Power metal'],\n", " u'powerpop': [u'power pop', u'powerpop'],\n", " u'prda': [u'prda'],\n", " u'prog': [u'prog'],\n", " u'progress': [u'Progressive'],\n", " u'progressdeathmetal': [u'progressive death metal'],\n", " u'progresshous': [u'Progressive House'],\n", " u'progressiv': [u'progressiv'],\n", " u'progressmetal': [u'Progressive metal'],\n", " u'progressrock': [u'Progressive rock'],\n", " u'progresstranc': [u'progressive trance'],\n", " u'progrock': [u'prog rock'],\n", " u'protopunk': [u'proto-punk'],\n", " u'psychedel': [u'psychedelic'],\n", " u'psychedelrock': [u'Psychedelic Rock'],\n", " u'psychil': [u'psychill'],\n", " u'psychobilli': [u'psychobilly'],\n", " u'psytranc': [u'psytrance'],\n", " u'punk': [u'punk'],\n", " u'punkfavorit': [u'Punk Favorites'],\n", " u'punkrock': [u'punk rock', u'punkrock'],\n", " u'q3': [u'q3'],\n", " u'quiet': [u'quiet'],\n", " u'quietstorm': [u'quiet storm'],\n", " u'quirki': [u'quirky'],\n", " u'rain': [u'rain'],\n", " u'rainiday': [u'Rainy Day'],\n", " u'randb': [u'r&b', u'r and b'],\n", " u'rap': [u'rap'],\n", " u'rapcor': [u'rapcore'],\n", " u'rave': [u'Rave'],\n", " u'rb': [u'RB'],\n", " u'reflect': [u'Reflective'],\n", " u'regga': [u'reggae'],\n", " u'relax': [u'relax', u'relaxing', u'relaxed'],\n", " u'remix': [u'remix'],\n", " u'repetitmelodphrase': [u'repetitive melodic phrasing'],\n", " u'rhythmandblue': [u'rhythm and blues'],\n", " u'rhythumandbluetag': [u'rhythum and blues tag'],\n", " u'rnb': [u'rnb'],\n", " u'rock': [u'rock'],\n", " u'rockabilli': [u'rockabilly'],\n", " u'rockandroll': [u'rock n roll', u'Rock and Roll'],\n", " u'rockballad': [u'rock ballad'],\n", " u'rockenespanol': [u'Rock en Espanol'],\n", " u'rockin': [u'rockin'],\n", " u'rockpop': [u'RockPop'],\n", " u'rockroll': [u'Rock Roll'],\n", " u'rocksteadi': [u'rocksteady'],\n", " u'romanc': [u'romance'],\n", " u'romant': [u'romantic'],\n", " u'romantica': [u'Romantica'],\n", " u'root': [u'roots'],\n", " u'rootregga': [u'roots reggae'],\n", " u'sad': [u'sad'],\n", " u'salsa': [u'salsa'],\n", " u'samba': [u'samba'],\n", " u'sanfrancisco': [u'san francisco'],\n", " u'sax': [u'sax'],\n", " u'saxophon': [u'saxophone'],\n", " u'scandinavian': [u'scandinavian'],\n", " u'scottish': [u'Scottish'],\n", " u'screamo': [u'screamo'],\n", " u'seattl': [u'seattle'],\n", " u'sensual': [u'sensual'],\n", " u'sentiment': [u'Sentimental'],\n", " u'sex': [u'sex'],\n", " u'sexi': [u'sexy'],\n", " u'shoegaz': [u'shoegaze'],\n", " u'silentintens': [u'silent intensity'],\n", " u'singalong': [u'sing along', u'singalong'],\n", " u'singer': [u'singer'],\n", " u'sixti': [u'sixties'],\n", " u'ska': [u'ska'],\n", " u'skapunk': [u'ska punk'],\n", " u'sleek': [u'sleek'],\n", " u'sleep': [u'Sleep'],\n", " u'slgdm': [u'slgdm'],\n", " u'slordig': [u'slordig'],\n", " u'slow': [u'slow'],\n", " u'slowjam': [u'slow jams'],\n", " u'sludg': [u'Sludge'],\n", " u'smooth': [u'smooth'],\n", " u'smoothjazz': [u'Smooth Jazz'],\n", " u'soft': [u'soft'],\n", " u'softrock': [u'soft rock'],\n", " u'solx': [u'solx'],\n", " u'somafm': [u'somafm'],\n", " u'sommer': [u'Sommer'],\n", " u'sooth': [u'soothing'],\n", " u'soul': [u'soul', u'soulful'],\n", " u'soulandrnbclassic': [u'soul and rnb classics'],\n", " u'soultag': [u'soul tag'],\n", " u'soundstorm': [u'sound storm'],\n", " u'southernrock': [u'Southern Rock'],\n", " u'space': [u'space'],\n", " u'spacerock': [u'space rock'],\n", " u'spanish': [u'spanish'],\n", " u'spanishrock': [u'Spanish Rock'],\n", " u'special': [u'special'],\n", " u'speed': [u'speed'],\n", " u'speedmetal': [u'speed metal'],\n", " u'spiritu': [u'spiritual'],\n", " u'spokenword': [u'spoken word'],\n", " u'spring': [u'spring'],\n", " u'stoner': [u'stoner'],\n", " u'stonerrock': [u'Stoner Rock'],\n", " u'stonesoup': [u'stonesoup'],\n", " u'stream': [u'stream'],\n", " u'string': [u'strings'],\n", " u'summer': [u'summer'],\n", " u'sunday': [u'sunday'],\n", " u'sunni': [u'sunny'],\n", " u'suomi': [u'Suomi'],\n", " u'suomipop': [u'SuomiPop'],\n", " u'suomirock': [u'suomirock'],\n", " u'surf': [u'Surf'],\n", " u'sweden': [u'Sweden'],\n", " u'swedish': [u'swedish'],\n", " u'swedishmetal': [u'Swedish Metal'],\n", " u'swing': [u'swing'],\n", " u'symphonmetal': [u'symphonic metal'],\n", " u'symphonrock': [u'Symphonic Rock'],\n", " u'synth': [u'synth'],\n", " u'synthpop': [u'synthpop', u'synth pop'],\n", " u'tantotempotast': [u'tantotempotaste'],\n", " u'techhous': [u'tech house'],\n", " u'technicdeathmetal': [u'Technical Death Metal'],\n", " u'techno': [u'techno'],\n", " u'temazo': [u'temazo'],\n", " u'texa': [u'texas'],\n", " u'thrash': [u'thrash'],\n", " u'thrashmetal': [u'thrash metal'],\n", " u'torquemada': [u'Torquemada'],\n", " u'tranc': [u'trance'],\n", " u'triphop': [u'trip-hop', u'trip hop'],\n", " u'trippi': [u'trippy'],\n", " u'trumpet': [u'trumpet'],\n", " u'twee': [u'twee'],\n", " u'uk': [u'UK'],\n", " u'underground': [u'underground'],\n", " u'undergroundhiphop': [u'underground hip-hop', u'underground hip hop'],\n", " u'upbeat': [u'upbeat'],\n", " u'uplift': [u'Uplifting'],\n", " u'uplifttranc': [u'uplifting trance'],\n", " u'urban': [u'urban'],\n", " u'usa': [u'USA'],\n", " u'vikemetal': [u'viking metal'],\n", " u'violin': [u'violin'],\n", " u'vocal': [u'vocal', u'vocals', u'vocalization'],\n", " u'vocalhous': [u'vocal house'],\n", " u'vocaljazz': [u'vocal jazz'],\n", " u'vocaltranc': [u'vocal trance'],\n", " u'warm': [u'warm'],\n", " u'weird': [u'weird'],\n", " u'westcoast': [u'west coast'],\n", " u'winter': [u'winter'],\n", " u'work': [u'work'],\n", " u'workout': [u'Workout'],\n", " u'world': [u'world'],\n", " u'worldfusion': [u'world fusion'],\n", " u'worldmusic': [u'World Music'],\n", " u'xma': [u'xmas']}" ] } ], "prompt_number": 3 }, { "cell_type": "code", "collapsed": false, "input": [ "voc_to_num = dict((tag, i) for (i, tag) in enumerate(tags))" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 12 }, { "cell_type": "code", "collapsed": false, "input": [ "def getArtistTracks(cur, aid):\n", " cur.execute(\"SELECT track_id FROM songs WHERE artist_id='%s'\" % aid)\n", " for (track, ) in cur_md:\n", " yield track\n", " \n", " \n", "def getValidTrackTags(cur, track, tid, vocab, voc_to_num):\n", " cur.execute(\"SELECT tag, val FROM tid_tag WHERE tid = %d AND val > 0\" % tid[track])\n", " out = {}\n", " for (tag, val) in cur:\n", " stag = sanitize(vocab[tag-1])\n", " if stag not in voc_to_num:\n", " continue\n", " if voc_to_num[stag] in out: \n", " new_val = min(100, out[voc_to_num[stag]] + float(val))\n", " out[voc_to_num[stag]] = new_val\n", " else:\n", " out[voc_to_num[stag]] = float(val)\n", " return out\n", "\n", "\n", "def numberize(infile, outfile, cur_md, cur_td, tid, vocab, voc_to_num):\n", " with open(infile, 'rb') as fr, open(outfile, 'wb') as fw:\n", " for line in fr:\n", " aid = line.strip()\n", " for track in getArtistTracks(cur_md, aid):\n", " if track not in tid:\n", " continue\n", " out = getValidTrackTags(cur_td, track, tid, vocab, voc_to_num)\n", " if len(out) != 0:\n", " fw.write('%s\\t%s\\n' % (track, ' '.join('%d:%.1f' % pair for pair in out.items()))) " ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 14 }, { "cell_type": "code", "collapsed": false, "input": [ "# turn the whole MSD tags to numbers\n", "with sqlite3.connect(md_dbfile) as conn_md, sqlite3.connect(tags_dbfile) as conn_td:\n", " \n", " cur_md = conn_md.cursor()\n", " cur_td = conn_td.cursor()\n", " \n", " # artists_train.txt and artists_test.txt can be obtained from \n", " # https://github.com/tbertinmahieux/MSongsDB/tree/master/Tasks_Demos/Tagging \n", " numberize('artists_train.txt', 'tracks_tag_train.num', cur_md, cur_td, tid, vocab, voc_to_num)\n", " numberize('artists_test.txt', 'tracks_tag_test.num', cur_md, cur_td, tid, vocab, voc_to_num)" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 15 }, { "cell_type": "code", "collapsed": false, "input": [ "def densify_and_save(infile, ncol):\n", " with open(infile, 'rb') as fr:\n", " for line in fr:\n", " tmp = line.split('\\t', 2)\n", " tid = tmp[0].strip()\n", " tdir = os.path.join('vq_hist', '/'.join(tid[2:5]))\n", " # this folder should already exist\n", " assert os.path.exists(tdir)\n", " \n", " pairs = tmp[-1].strip().split()\n", " keyvals = [p.split(':') for p in pairs]\n", " keyvals = [(int(key), float(val)) for key, val in keyvals]\n", " row = np.zeros((ncol, ), dtype=np.int16)\n", " for (k, v) in keyvals:\n", " row[k] = v\n", " np.save(os.path.join(tdir, tid + '_BoT'), row)\n", " pass" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 36 }, { "cell_type": "code", "collapsed": false, "input": [ "densify_and_save('tracks_tag_train.num', len(tags))\n", "densify_and_save('tracks_tag_test.num', len(tags))" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 37 } ], "metadata": {} } ] }