{ "cells": [ { "cell_type": "code", "execution_count": 2, "metadata": { "collapsed": true }, "outputs": [], "source": [ "# import the graphlab\n", "import graphlab as gl" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "collapsed": false }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "[INFO] graphlab.cython.cy_server: GraphLab Create v2.1 started. Logging: C:\\Users\\Atul\\AppData\\Local\\Temp\\graphlab_server_1469128219.log.0\n", "INFO:graphlab.cython.cy_server:GraphLab Create v2.1 started. Logging: C:\\Users\\Atul\\AppData\\Local\\Temp\\graphlab_server_1469128219.log.0\n" ] }, { "data": { "text/html": [ "
Finished parsing file C:\\learn\\ML\\ML00caseStudy\\week01Intro\\song_data.csv
" ], "text/plain": [ "Finished parsing file C:\\learn\\ML\\ML00caseStudy\\week01Intro\\song_data.csv" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
Parsing completed. Parsed 100 lines in 4.06285 secs.
" ], "text/plain": [ "Parsing completed. Parsed 100 lines in 4.06285 secs." ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "This non-commercial license of GraphLab Create for academic use is assigned to atul9806@yahoo.in and will expire on December 09, 2016.\n", "------------------------------------------------------" ] }, { "data": { "text/html": [ "
Read 637410 lines. Lines per second: 192485
" ], "text/plain": [ "Read 637410 lines. Lines per second: 192485" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
Finished parsing file C:\\learn\\ML\\ML00caseStudy\\week01Intro\\song_data.csv
" ], "text/plain": [ "Finished parsing file C:\\learn\\ML\\ML00caseStudy\\week01Intro\\song_data.csv" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
Parsing completed. Parsed 1000000 lines in 3.68689 secs.
" ], "text/plain": [ "Parsing completed. Parsed 1000000 lines in 3.68689 secs." ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "\n", "Inferred types from first 100 line(s) of file as \n", "column_type_hints=[str,str,str,str,long]\n", "If parsing fails due to incorrect types, you can correct\n", "the inferred type list above and pass it to read_csv in\n", "the column_type_hints argument\n", "------------------------------------------------------\n" ] } ], "source": [ "# reading the csv file\n", "data = gl.SFrame.read_csv(\"C:\\learn\\ML\\ML00caseStudy\\week01Intro\\song_data.csv\")" ] }, { "cell_type": "code", "execution_count": 4, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "(1000000, 5, 1000000)" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# getting no of rows and columns\n", "data.num_rows(), data.num_columns() , len(data)" ] }, { "cell_type": "code", "execution_count": 5, "metadata": { "collapsed": true }, "outputs": [], "source": [ "# saving the dataset\n", "data.save('songs')" ] }, { "cell_type": "code", "execution_count": 6, "metadata": { "collapsed": false }, "outputs": [], "source": [ "# loading the dataset\n", "sdata = gl.load_sframe('songs')" ] }, { "cell_type": "code", "execution_count": 7, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
song_idtitlereleaseartist_nameyear
SOQMMHC12AB0180CB8Silent NightMonster Ballads X-MasFaster Pussy cat2003
SOVFVAK12A8C1350D9Tanssi vaanKarkuteilläKarkkiautomaatti1995
SOGTUKN12AB017F4F1No One Could EverButterHudson Mohawke2006
SOBNYVR12A8C13558CSi Vos QuerésDe CuloYerba Brava2003
SOHSBXH12A8C13B0DFTangle Of AspensRene Ablaze Presents
Winter Sessions ...
Der Mystic0
\n", "[5 rows x 5 columns]
\n", "
" ], "text/plain": [ "Columns:\n", "\tsong_id\tstr\n", "\ttitle\tstr\n", "\trelease\tstr\n", "\tartist_name\tstr\n", "\tyear\tint\n", "\n", "Rows: 5\n", "\n", "Data:\n", "+--------------------+-------------------+-------------------------------+\n", "| song_id | title | release |\n", "+--------------------+-------------------+-------------------------------+\n", "| SOQMMHC12AB0180CB8 | Silent Night | Monster Ballads X-Mas |\n", "| SOVFVAK12A8C1350D9 | Tanssi vaan | Karkuteillä |\n", "| SOGTUKN12AB017F4F1 | No One Could Ever | Butter |\n", "| SOBNYVR12A8C13558C | Si Vos Querés | De Culo |\n", "| SOHSBXH12A8C13B0DF | Tangle Of Aspens | Rene Ablaze Presents Winte... |\n", "+--------------------+-------------------+-------------------------------+\n", "+------------------+------+\n", "| artist_name | year |\n", "+------------------+------+\n", "| Faster Pussy cat | 2003 |\n", "| Karkkiautomaatti | 1995 |\n", "| Hudson Mohawke | 2006 |\n", "| Yerba Brava | 2003 |\n", "| Der Mystic | 0 |\n", "+------------------+------+\n", "[5 rows x 5 columns]" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# head 5 rows\n", "sdata.head(5)" ] }, { "cell_type": "code", "execution_count": 8, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
song_idtitlereleaseartist_nameyear
SOXQYIQ12A8C137FBBJago ChhadeoNaale Baba Lassi Pee GyaKuldeep Manak0
SOHODZI12A8C137BB3NovembaDub_Connected: electronic
music ...
Gabriel Le Mar0
SOLXGOR12A81C21EB7FaradayThe Trance Collection
Vol. 2 ...
Elude0
SOWXJXQ12AB0189F43Fernweh feat. Sektion
Kuchikäschtli ...
So Oder SoTexta2004
\n", "[4 rows x 5 columns]
\n", "
" ], "text/plain": [ "Columns:\n", "\tsong_id\tstr\n", "\ttitle\tstr\n", "\trelease\tstr\n", "\tartist_name\tstr\n", "\tyear\tint\n", "\n", "Rows: 4\n", "\n", "Data:\n", "+--------------------+-------------------------------+\n", "| song_id | title |\n", "+--------------------+-------------------------------+\n", "| SOXQYIQ12A8C137FBB | Jago Chhadeo |\n", "| SOHODZI12A8C137BB3 | Novemba |\n", "| SOLXGOR12A81C21EB7 | Faraday |\n", "| SOWXJXQ12AB0189F43 | Fernweh feat. Sektion Kuch... |\n", "+--------------------+-------------------------------+\n", "+-------------------------------+----------------+------+\n", "| release | artist_name | year |\n", "+-------------------------------+----------------+------+\n", "| Naale Baba Lassi Pee Gya | Kuldeep Manak | 0 |\n", "| Dub_Connected: electronic ... | Gabriel Le Mar | 0 |\n", "| The Trance Collection Vol. 2 | Elude | 0 |\n", "| So Oder So | Texta | 2004 |\n", "+-------------------------------+----------------+------+\n", "[4 rows x 5 columns]" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# likewise tail 4 rows\n", "sdata.tail(4)" ] }, { "cell_type": "code", "execution_count": 9, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "([str, str, str, str, int],\n", " ['song_id', 'title', 'release', 'artist_name', 'year'])" ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# checking the column names and data type\n", "sdata.column_types(), sdata.column_names()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Modify the data" ] }, { "cell_type": "code", "execution_count": 10, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
song_idtitlereleaseartist_nameyearmy_rating
SOQMMHC12AB0180CB8Silent NightMonster Ballads X-MasFaster Pussy cat20030
SOVFVAK12A8C1350D9Tanssi vaanKarkuteilläKarkkiautomaatti19950
SOGTUKN12AB017F4F1No One Could EverButterHudson Mohawke20060
SOBNYVR12A8C13558CSi Vos QuerésDe CuloYerba Brava20030
SOHSBXH12A8C13B0DFTangle Of AspensRene Ablaze Presents
Winter Sessions ...
Der Mystic00
SOZVAPQ12A8C13B63CSymphony No. 1 G minor
\"Sinfonie ...
Berwald: Symphonies Nos.
1/2/3/4 ...
David Montgomery00
SOQVRHI12A6D4FB2D7We Have Got LoveStrictly The Best Vol. 34Sasha / Turbulence00
SOEYRFT12AB018936C2 Da Beat Ch'yallDa BombKris Kross19930
SOPMIYT12A6D4F851EGoodbyeDanny BoyJoseph Locke00
SOJCFMH12A8C13B0C2Mama_ mama can't you see
? ...
March to cadence with the
US marines ...
The Sun Harbor's Chorus-
Documentary Recordings ...
00
\n", "[1000000 rows x 6 columns]
Note: Only the head of the SFrame is printed.
You can use print_rows(num_rows=m, num_columns=n) to print more rows and columns.\n", "
" ], "text/plain": [ "Columns:\n", "\tsong_id\tstr\n", "\ttitle\tstr\n", "\trelease\tstr\n", "\tartist_name\tstr\n", "\tyear\tint\n", "\tmy_rating\tint\n", "\n", "Rows: 1000000\n", "\n", "Data:\n", "+--------------------+-------------------------------+\n", "| song_id | title |\n", "+--------------------+-------------------------------+\n", "| SOQMMHC12AB0180CB8 | Silent Night |\n", "| SOVFVAK12A8C1350D9 | Tanssi vaan |\n", "| SOGTUKN12AB017F4F1 | No One Could Ever |\n", "| SOBNYVR12A8C13558C | Si Vos Querés |\n", "| SOHSBXH12A8C13B0DF | Tangle Of Aspens |\n", "| SOZVAPQ12A8C13B63C | Symphony No. 1 G minor \"Si... |\n", "| SOQVRHI12A6D4FB2D7 | We Have Got Love |\n", "| SOEYRFT12AB018936C | 2 Da Beat Ch'yall |\n", "| SOPMIYT12A6D4F851E | Goodbye |\n", "| SOJCFMH12A8C13B0C2 | Mama_ mama can't you see ? |\n", "+--------------------+-------------------------------+\n", "+-------------------------------+-------------------------------+------+-----------+\n", "| release | artist_name | year | my_rating |\n", "+-------------------------------+-------------------------------+------+-----------+\n", "| Monster Ballads X-Mas | Faster Pussy cat | 2003 | 0 |\n", "| Karkuteillä | Karkkiautomaatti | 1995 | 0 |\n", "| Butter | Hudson Mohawke | 2006 | 0 |\n", "| De Culo | Yerba Brava | 2003 | 0 |\n", "| Rene Ablaze Presents Winte... | Der Mystic | 0 | 0 |\n", "| Berwald: Symphonies Nos. 1... | David Montgomery | 0 | 0 |\n", "| Strictly The Best Vol. 34 | Sasha / Turbulence | 0 | 0 |\n", "| Da Bomb | Kris Kross | 1993 | 0 |\n", "| Danny Boy | Joseph Locke | 0 | 0 |\n", "| March to cadence with the ... | The Sun Harbor's Chorus-Do... | 0 | 0 |\n", "+-------------------------------+-------------------------------+------+-----------+\n", "[1000000 rows x 6 columns]\n", "Note: Only the head of the SFrame is printed.\n", "You can use print_rows(num_rows=m, num_columns=n) to print more rows and columns." ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# creating a new columns\n", "sdata['my_rating'] = 0\n", "sdata" ] }, { "cell_type": "code", "execution_count": 11, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
song_idtitlereleaseartist_nameyearmy_rating
SOQMMHC12AB0180CB8Silent NightMonster Ballads X-MasFaster Pussy cat20030
SOVFVAK12A8C1350D9Tanssi vaanKarkuteilläKarkkiautomaatti19950
SOGTUKN12AB017F4F1No One Could EverButterHudson Mohawke20060
SOBNYVR12A8C13558CSi Vos QuerésDe CuloYerba Brava20030
SOHSBXH12A8C13B0DFTangle Of AspensRene Ablaze Presents
Winter Sessions ...
Der Mystic00
SOZVAPQ12A8C13B63CSymphony No. 1 G minor
\"Sinfonie ...
Berwald: Symphonies Nos.
1/2/3/4 ...
David Montgomery00
SOQVRHI12A6D4FB2D7We Have Got LoveStrictly The Best Vol. 34Sasha / Turbulence00
SOEYRFT12AB018936C2 Da Beat Ch'yallDa BombKris Kross19930
SOPMIYT12A6D4F851EGoodbyeDanny BoyJoseph Locke00
SOJCFMH12A8C13B0C2Mama_ mama can't you see
? ...
March to cadence with the
US marines ...
The Sun Harbor's Chorus-
Documentary Recordings ...
00
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
song_age
14
6
17
14
-1989
-1989
-1989
4
-1989
-1989
\n", "[1000000 rows x 7 columns]
Note: Only the head of the SFrame is printed.
You can use print_rows(num_rows=m, num_columns=n) to print more rows and columns.\n", "
" ], "text/plain": [ "Columns:\n", "\tsong_id\tstr\n", "\ttitle\tstr\n", "\trelease\tstr\n", "\tartist_name\tstr\n", "\tyear\tint\n", "\tmy_rating\tint\n", "\tsong_age\tint\n", "\n", "Rows: 1000000\n", "\n", "Data:\n", "+--------------------+-------------------------------+\n", "| song_id | title |\n", "+--------------------+-------------------------------+\n", "| SOQMMHC12AB0180CB8 | Silent Night |\n", "| SOVFVAK12A8C1350D9 | Tanssi vaan |\n", "| SOGTUKN12AB017F4F1 | No One Could Ever |\n", "| SOBNYVR12A8C13558C | Si Vos Querés |\n", "| SOHSBXH12A8C13B0DF | Tangle Of Aspens |\n", "| SOZVAPQ12A8C13B63C | Symphony No. 1 G minor \"Si... |\n", "| SOQVRHI12A6D4FB2D7 | We Have Got Love |\n", "| SOEYRFT12AB018936C | 2 Da Beat Ch'yall |\n", "| SOPMIYT12A6D4F851E | Goodbye |\n", "| SOJCFMH12A8C13B0C2 | Mama_ mama can't you see ? |\n", "+--------------------+-------------------------------+\n", "+-------------------------------+-------------------------------+------+-----------+\n", "| release | artist_name | year | my_rating |\n", "+-------------------------------+-------------------------------+------+-----------+\n", "| Monster Ballads X-Mas | Faster Pussy cat | 2003 | 0 |\n", "| Karkuteillä | Karkkiautomaatti | 1995 | 0 |\n", "| Butter | Hudson Mohawke | 2006 | 0 |\n", "| De Culo | Yerba Brava | 2003 | 0 |\n", "| Rene Ablaze Presents Winte... | Der Mystic | 0 | 0 |\n", "| Berwald: Symphonies Nos. 1... | David Montgomery | 0 | 0 |\n", "| Strictly The Best Vol. 34 | Sasha / Turbulence | 0 | 0 |\n", "| Da Bomb | Kris Kross | 1993 | 0 |\n", "| Danny Boy | Joseph Locke | 0 | 0 |\n", "| March to cadence with the ... | The Sun Harbor's Chorus-Do... | 0 | 0 |\n", "+-------------------------------+-------------------------------+------+-----------+\n", "+----------+\n", "| song_age |\n", "+----------+\n", "| 14 |\n", "| 6 |\n", "| 17 |\n", "| 14 |\n", "| -1989 |\n", "| -1989 |\n", "| -1989 |\n", "| 4 |\n", "| -1989 |\n", "| -1989 |\n", "+----------+\n", "[1000000 rows x 7 columns]\n", "Note: Only the head of the SFrame is printed.\n", "You can use print_rows(num_rows=m, num_columns=n) to print more rows and columns." ] }, "execution_count": 11, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# generating a new colmns called song_age respective to my age\n", "\n", "my_age = 1989\n", "\n", "sdata['song_age']=sdata['year']-my_age\n", "sdata" ] }, { "cell_type": "code", "execution_count": 12, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
song_idtitlereleaseartist_nameyearmy_rating
SOQMMHC12AB0180CB8Silent NightMonster Ballads X-MasFaster Pussy cat20030
SOVFVAK12A8C1350D9Tanssi vaanKarkuteilläKarkkiautomaatti19950
SOGTUKN12AB017F4F1No One Could EverButterHudson Mohawke20060
SOBNYVR12A8C13558CSi Vos QuerésDe CuloYerba Brava20030
SOHSBXH12A8C13B0DFTangle Of AspensRene Ablaze Presents
Winter Sessions ...
Der Mystic00
SOZVAPQ12A8C13B63CSymphony No. 1 G minor
\"Sinfonie ...
Berwald: Symphonies Nos.
1/2/3/4 ...
David Montgomery00
SOQVRHI12A6D4FB2D7We Have Got LoveStrictly The Best Vol. 34Sasha / Turbulence00
SOEYRFT12AB018936C2 Da Beat Ch'yallDa BombKris Kross19930
SOPMIYT12A6D4F851EGoodbyeDanny BoyJoseph Locke00
SOJCFMH12A8C13B0C2Mama_ mama can't you see
? ...
March to cadence with the
US marines ...
The Sun Harbor's Chorus-
Documentary Recordings ...
00
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
song_agetitle length
142
62
174
143
-19893
-19899
-19894
44
-19891
-19896
\n", "[1000000 rows x 8 columns]
Note: Only the head of the SFrame is printed.
You can use print_rows(num_rows=m, num_columns=n) to print more rows and columns.\n", "
" ], "text/plain": [ "Columns:\n", "\tsong_id\tstr\n", "\ttitle\tstr\n", "\trelease\tstr\n", "\tartist_name\tstr\n", "\tyear\tint\n", "\tmy_rating\tint\n", "\tsong_age\tint\n", "\ttitle length\tint\n", "\n", "Rows: 1000000\n", "\n", "Data:\n", "+--------------------+-------------------------------+\n", "| song_id | title |\n", "+--------------------+-------------------------------+\n", "| SOQMMHC12AB0180CB8 | Silent Night |\n", "| SOVFVAK12A8C1350D9 | Tanssi vaan |\n", "| SOGTUKN12AB017F4F1 | No One Could Ever |\n", "| SOBNYVR12A8C13558C | Si Vos Querés |\n", "| SOHSBXH12A8C13B0DF | Tangle Of Aspens |\n", "| SOZVAPQ12A8C13B63C | Symphony No. 1 G minor \"Si... |\n", "| SOQVRHI12A6D4FB2D7 | We Have Got Love |\n", "| SOEYRFT12AB018936C | 2 Da Beat Ch'yall |\n", "| SOPMIYT12A6D4F851E | Goodbye |\n", "| SOJCFMH12A8C13B0C2 | Mama_ mama can't you see ? |\n", "+--------------------+-------------------------------+\n", "+-------------------------------+-------------------------------+------+-----------+\n", "| release | artist_name | year | my_rating |\n", "+-------------------------------+-------------------------------+------+-----------+\n", "| Monster Ballads X-Mas | Faster Pussy cat | 2003 | 0 |\n", "| Karkuteillä | Karkkiautomaatti | 1995 | 0 |\n", "| Butter | Hudson Mohawke | 2006 | 0 |\n", "| De Culo | Yerba Brava | 2003 | 0 |\n", "| Rene Ablaze Presents Winte... | Der Mystic | 0 | 0 |\n", "| Berwald: Symphonies Nos. 1... | David Montgomery | 0 | 0 |\n", "| Strictly The Best Vol. 34 | Sasha / Turbulence | 0 | 0 |\n", "| Da Bomb | Kris Kross | 1993 | 0 |\n", "| Danny Boy | Joseph Locke | 0 | 0 |\n", "| March to cadence with the ... | The Sun Harbor's Chorus-Do... | 0 | 0 |\n", "+-------------------------------+-------------------------------+------+-----------+\n", "+----------+--------------+\n", "| song_age | title length |\n", "+----------+--------------+\n", "| 14 | 2 |\n", "| 6 | 2 |\n", "| 17 | 4 |\n", "| 14 | 3 |\n", "| -1989 | 3 |\n", "| -1989 | 9 |\n", "| -1989 | 4 |\n", "| 4 | 4 |\n", "| -1989 | 1 |\n", "| -1989 | 6 |\n", "+----------+--------------+\n", "[1000000 rows x 8 columns]\n", "Note: Only the head of the SFrame is printed.\n", "You can use print_rows(num_rows=m, num_columns=n) to print more rows and columns." ] }, "execution_count": 12, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# generating another column title length\n", "sdata['title length'] = sdata['title'].apply(lambda x : len(x.split()))\n", "sdata" ] }, { "cell_type": "code", "execution_count": 13, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
song_idtitlereleaseartist_nameyearmy_rating
SOQMMHC12AB0180CB8Silent NightMonster Ballads X-MasFaster Pussy cat20030
SOVFVAK12A8C1350D9Tanssi vaanKarkuteilläKarkkiautomaatti19950
SOGTUKN12AB017F4F1No One Could EverButterHudson Mohawke20060
SOBNYVR12A8C13558CSi Vos QuerésDe CuloYerba Brava20030
SOHSBXH12A8C13B0DFTangle Of AspensRene Ablaze Presents
Winter Sessions ...
Der Mystic00
SOZVAPQ12A8C13B63CSymphony No. 1 G minor
\"Sinfonie ...
Berwald: Symphonies Nos.
1/2/3/4 ...
David Montgomery00
SOQVRHI12A6D4FB2D7We Have Got LoveStrictly The Best Vol. 34Sasha / Turbulence00
SOEYRFT12AB018936C2 Da Beat Ch'yallDa BombKris Kross19930
SOPMIYT12A6D4F851EGoodbyeDanny BoyJoseph Locke00
SOJCFMH12A8C13B0C2Mama_ mama can't you see
? ...
March to cadence with the
US marines ...
The Sun Harbor's Chorus-
Documentary Recordings ...
00
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
song_agetitle lengthhow_old_was_i
14214
626
17417
14314
-19893-1989
-19899-1989
-19894-1989
444
-19891-1989
-19896-1989
\n", "[1000000 rows x 9 columns]
Note: Only the head of the SFrame is printed.
You can use print_rows(num_rows=m, num_columns=n) to print more rows and columns.\n", "
" ], "text/plain": [ "Columns:\n", "\tsong_id\tstr\n", "\ttitle\tstr\n", "\trelease\tstr\n", "\tartist_name\tstr\n", "\tyear\tint\n", "\tmy_rating\tint\n", "\tsong_age\tint\n", "\ttitle length\tint\n", "\thow_old_was_i\tint\n", "\n", "Rows: 1000000\n", "\n", "Data:\n", "+--------------------+-------------------------------+\n", "| song_id | title |\n", "+--------------------+-------------------------------+\n", "| SOQMMHC12AB0180CB8 | Silent Night |\n", "| SOVFVAK12A8C1350D9 | Tanssi vaan |\n", "| SOGTUKN12AB017F4F1 | No One Could Ever |\n", "| SOBNYVR12A8C13558C | Si Vos Querés |\n", "| SOHSBXH12A8C13B0DF | Tangle Of Aspens |\n", "| SOZVAPQ12A8C13B63C | Symphony No. 1 G minor \"Si... |\n", "| SOQVRHI12A6D4FB2D7 | We Have Got Love |\n", "| SOEYRFT12AB018936C | 2 Da Beat Ch'yall |\n", "| SOPMIYT12A6D4F851E | Goodbye |\n", "| SOJCFMH12A8C13B0C2 | Mama_ mama can't you see ? |\n", "+--------------------+-------------------------------+\n", "+-------------------------------+-------------------------------+------+-----------+\n", "| release | artist_name | year | my_rating |\n", "+-------------------------------+-------------------------------+------+-----------+\n", "| Monster Ballads X-Mas | Faster Pussy cat | 2003 | 0 |\n", "| Karkuteillä | Karkkiautomaatti | 1995 | 0 |\n", "| Butter | Hudson Mohawke | 2006 | 0 |\n", "| De Culo | Yerba Brava | 2003 | 0 |\n", "| Rene Ablaze Presents Winte... | Der Mystic | 0 | 0 |\n", "| Berwald: Symphonies Nos. 1... | David Montgomery | 0 | 0 |\n", "| Strictly The Best Vol. 34 | Sasha / Turbulence | 0 | 0 |\n", "| Da Bomb | Kris Kross | 1993 | 0 |\n", "| Danny Boy | Joseph Locke | 0 | 0 |\n", "| March to cadence with the ... | The Sun Harbor's Chorus-Do... | 0 | 0 |\n", "+-------------------------------+-------------------------------+------+-----------+\n", "+----------+--------------+---------------+\n", "| song_age | title length | how_old_was_i |\n", "+----------+--------------+---------------+\n", "| 14 | 2 | 14 |\n", "| 6 | 2 | 6 |\n", "| 17 | 4 | 17 |\n", "| 14 | 3 | 14 |\n", "| -1989 | 3 | -1989 |\n", "| -1989 | 9 | -1989 |\n", "| -1989 | 4 | -1989 |\n", "| 4 | 4 | 4 |\n", "| -1989 | 1 | -1989 |\n", "| -1989 | 6 | -1989 |\n", "+----------+--------------+---------------+\n", "[1000000 rows x 9 columns]\n", "Note: Only the head of the SFrame is printed.\n", "You can use print_rows(num_rows=m, num_columns=n) to print more rows and columns." ] }, "execution_count": 13, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# adding one more column\n", "sdata.add_column(sdata.select_column('year').apply(lambda x: x - my_age),name='how_old_was_i')\n", "sdata" ] }, { "cell_type": "code", "execution_count": 14, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
song_idtitlereleaseartist_nameyearmy_rating
SOQMMHC12AB0180CB8Silent NightMonster Ballads X-MasFaster Pussy cat20030
SOVFVAK12A8C1350D9Tanssi vaanKarkuteilläKarkkiautomaatti19950
SOGTUKN12AB017F4F1No One Could EverButterHudson Mohawke20060
SOBNYVR12A8C13558CSi Vos QuerésDe CuloYerba Brava20030
SOHSBXH12A8C13B0DFTangle Of AspensRene Ablaze Presents
Winter Sessions ...
Der Mystic00
SOZVAPQ12A8C13B63CSymphony No. 1 G minor
\"Sinfonie ...
Berwald: Symphonies Nos.
1/2/3/4 ...
David Montgomery00
SOQVRHI12A6D4FB2D7We Have Got LoveStrictly The Best Vol. 34Sasha / Turbulence00
SOEYRFT12AB018936C2 Da Beat Ch'yallDa BombKris Kross19930
SOPMIYT12A6D4F851EGoodbyeDanny BoyJoseph Locke00
SOJCFMH12A8C13B0C2Mama_ mama can't you see
? ...
March to cadence with the
US marines ...
The Sun Harbor's Chorus-
Documentary Recordings ...
00
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
song_agetitle lengthhow_old_was_icol1col2
1421420
62620
1741740
1431430
-19893-198930
-19899-198990
-19894-198940
44440
-19891-198910
-19896-198960
\n", "[1000000 rows x 11 columns]
Note: Only the head of the SFrame is printed.
You can use print_rows(num_rows=m, num_columns=n) to print more rows and columns.\n", "
" ], "text/plain": [ "Columns:\n", "\tsong_id\tstr\n", "\ttitle\tstr\n", "\trelease\tstr\n", "\tartist_name\tstr\n", "\tyear\tint\n", "\tmy_rating\tint\n", "\tsong_age\tint\n", "\ttitle length\tint\n", "\thow_old_was_i\tint\n", "\tcol1\tint\n", "\tcol2\tint\n", "\n", "Rows: 1000000\n", "\n", "Data:\n", "+--------------------+-------------------------------+\n", "| song_id | title |\n", "+--------------------+-------------------------------+\n", "| SOQMMHC12AB0180CB8 | Silent Night |\n", "| SOVFVAK12A8C1350D9 | Tanssi vaan |\n", "| SOGTUKN12AB017F4F1 | No One Could Ever |\n", "| SOBNYVR12A8C13558C | Si Vos Querés |\n", "| SOHSBXH12A8C13B0DF | Tangle Of Aspens |\n", "| SOZVAPQ12A8C13B63C | Symphony No. 1 G minor \"Si... |\n", "| SOQVRHI12A6D4FB2D7 | We Have Got Love |\n", "| SOEYRFT12AB018936C | 2 Da Beat Ch'yall |\n", "| SOPMIYT12A6D4F851E | Goodbye |\n", "| SOJCFMH12A8C13B0C2 | Mama_ mama can't you see ? |\n", "+--------------------+-------------------------------+\n", "+-------------------------------+-------------------------------+------+-----------+\n", "| release | artist_name | year | my_rating |\n", "+-------------------------------+-------------------------------+------+-----------+\n", "| Monster Ballads X-Mas | Faster Pussy cat | 2003 | 0 |\n", "| Karkuteillä | Karkkiautomaatti | 1995 | 0 |\n", "| Butter | Hudson Mohawke | 2006 | 0 |\n", "| De Culo | Yerba Brava | 2003 | 0 |\n", "| Rene Ablaze Presents Winte... | Der Mystic | 0 | 0 |\n", "| Berwald: Symphonies Nos. 1... | David Montgomery | 0 | 0 |\n", "| Strictly The Best Vol. 34 | Sasha / Turbulence | 0 | 0 |\n", "| Da Bomb | Kris Kross | 1993 | 0 |\n", "| Danny Boy | Joseph Locke | 0 | 0 |\n", "| March to cadence with the ... | The Sun Harbor's Chorus-Do... | 0 | 0 |\n", "+-------------------------------+-------------------------------+------+-----------+\n", "+----------+--------------+---------------+------+------+\n", "| song_age | title length | how_old_was_i | col1 | col2 |\n", "+----------+--------------+---------------+------+------+\n", "| 14 | 2 | 14 | 2 | 0 |\n", "| 6 | 2 | 6 | 2 | 0 |\n", "| 17 | 4 | 17 | 4 | 0 |\n", "| 14 | 3 | 14 | 3 | 0 |\n", "| -1989 | 3 | -1989 | 3 | 0 |\n", "| -1989 | 9 | -1989 | 9 | 0 |\n", "| -1989 | 4 | -1989 | 4 | 0 |\n", "| 4 | 4 | 4 | 4 | 0 |\n", "| -1989 | 1 | -1989 | 1 | 0 |\n", "| -1989 | 6 | -1989 | 6 | 0 |\n", "+----------+--------------+---------------+------+------+\n", "[1000000 rows x 11 columns]\n", "Note: Only the head of the SFrame is printed.\n", "You can use print_rows(num_rows=m, num_columns=n) to print more rows and columns." ] }, "execution_count": 14, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# can add multiple columns at a time\n", "sdata[['col1','col2']] = [sdata['title length'],sdata['my_rating']]\n", "sdata" ] }, { "cell_type": "code", "execution_count": 15, "metadata": { "collapsed": false }, "outputs": [], "source": [ "#adding some more\n", "#sdata[['col3','col4']] = [[3,4]] this is not allowed, assigned values should be SArrray\n", "#sdata" ] }, { "cell_type": "code", "execution_count": 16, "metadata": { "collapsed": true }, "outputs": [], "source": [ "# deleting column\n", "del sdata['song_age']" ] }, { "cell_type": "code", "execution_count": 17, "metadata": { "collapsed": false, "scrolled": true }, "outputs": [ { "data": { "text/html": [ "
\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
song_idtitlereleaseartist_nameyearmy_rating
SOQMMHC12AB0180CB8Silent NightMonster Ballads X-MasFaster Pussy cat20030
SOVFVAK12A8C1350D9Tanssi vaanKarkuteilläKarkkiautomaatti19950
SOGTUKN12AB017F4F1No One Could EverButterHudson Mohawke20060
SOBNYVR12A8C13558CSi Vos QuerésDe CuloYerba Brava20030
SOHSBXH12A8C13B0DFTangle Of AspensRene Ablaze Presents
Winter Sessions ...
Der Mystic00
SOZVAPQ12A8C13B63CSymphony No. 1 G minor
\"Sinfonie ...
Berwald: Symphonies Nos.
1/2/3/4 ...
David Montgomery00
SOQVRHI12A6D4FB2D7We Have Got LoveStrictly The Best Vol. 34Sasha / Turbulence00
SOEYRFT12AB018936C2 Da Beat Ch'yallDa BombKris Kross19930
SOPMIYT12A6D4F851EGoodbyeDanny BoyJoseph Locke00
SOJCFMH12A8C13B0C2Mama_ mama can't you see
? ...
March to cadence with the
US marines ...
The Sun Harbor's Chorus-
Documentary Recordings ...
00
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
title_lengthhow_old_was_icol1col2
21420
2620
41740
31430
3-198930
9-198990
4-198940
4440
1-198910
6-198960
\n", "[1000000 rows x 10 columns]
Note: Only the head of the SFrame is printed.
You can use print_rows(num_rows=m, num_columns=n) to print more rows and columns.\n", "
" ], "text/plain": [ "Columns:\n", "\tsong_id\tstr\n", "\ttitle\tstr\n", "\trelease\tstr\n", "\tartist_name\tstr\n", "\tyear\tint\n", "\tmy_rating\tint\n", "\ttitle_length\tint\n", "\thow_old_was_i\tint\n", "\tcol1\tint\n", "\tcol2\tint\n", "\n", "Rows: 1000000\n", "\n", "Data:\n", "+--------------------+-------------------------------+\n", "| song_id | title |\n", "+--------------------+-------------------------------+\n", "| SOQMMHC12AB0180CB8 | Silent Night |\n", "| SOVFVAK12A8C1350D9 | Tanssi vaan |\n", "| SOGTUKN12AB017F4F1 | No One Could Ever |\n", "| SOBNYVR12A8C13558C | Si Vos Querés |\n", "| SOHSBXH12A8C13B0DF | Tangle Of Aspens |\n", "| SOZVAPQ12A8C13B63C | Symphony No. 1 G minor \"Si... |\n", "| SOQVRHI12A6D4FB2D7 | We Have Got Love |\n", "| SOEYRFT12AB018936C | 2 Da Beat Ch'yall |\n", "| SOPMIYT12A6D4F851E | Goodbye |\n", "| SOJCFMH12A8C13B0C2 | Mama_ mama can't you see ? |\n", "+--------------------+-------------------------------+\n", "+-------------------------------+-------------------------------+------+-----------+\n", "| release | artist_name | year | my_rating |\n", "+-------------------------------+-------------------------------+------+-----------+\n", "| Monster Ballads X-Mas | Faster Pussy cat | 2003 | 0 |\n", "| Karkuteillä | Karkkiautomaatti | 1995 | 0 |\n", "| Butter | Hudson Mohawke | 2006 | 0 |\n", "| De Culo | Yerba Brava | 2003 | 0 |\n", "| Rene Ablaze Presents Winte... | Der Mystic | 0 | 0 |\n", "| Berwald: Symphonies Nos. 1... | David Montgomery | 0 | 0 |\n", "| Strictly The Best Vol. 34 | Sasha / Turbulence | 0 | 0 |\n", "| Da Bomb | Kris Kross | 1993 | 0 |\n", "| Danny Boy | Joseph Locke | 0 | 0 |\n", "| March to cadence with the ... | The Sun Harbor's Chorus-Do... | 0 | 0 |\n", "+-------------------------------+-------------------------------+------+-----------+\n", "+--------------+---------------+------+------+\n", "| title_length | how_old_was_i | col1 | col2 |\n", "+--------------+---------------+------+------+\n", "| 2 | 14 | 2 | 0 |\n", "| 2 | 6 | 2 | 0 |\n", "| 4 | 17 | 4 | 0 |\n", "| 3 | 14 | 3 | 0 |\n", "| 3 | -1989 | 3 | 0 |\n", "| 9 | -1989 | 9 | 0 |\n", "| 4 | -1989 | 4 | 0 |\n", "| 4 | 4 | 4 | 0 |\n", "| 1 | -1989 | 1 | 0 |\n", "| 6 | -1989 | 6 | 0 |\n", "+--------------+---------------+------+------+\n", "[1000000 rows x 10 columns]\n", "Note: Only the head of the SFrame is printed.\n", "You can use print_rows(num_rows=m, num_columns=n) to print more rows and columns." ] }, "execution_count": 17, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# rename columns\n", "sdata.rename({'title length':'title_length'})\n", "sdata" ] }, { "cell_type": "code", "execution_count": 18, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
song_idtitlereleaseartist_nametitle_length
SOQMMHC12AB0180CB8Silent NightMonster Ballads X-MasFaster Pussy cat2
SOVFVAK12A8C1350D9Tanssi vaanKarkuteilläKarkkiautomaatti2
SOGTUKN12AB017F4F1No One Could EverButterHudson Mohawke4
SOBNYVR12A8C13558CSi Vos QuerésDe CuloYerba Brava3
SOHSBXH12A8C13B0DFTangle Of AspensRene Ablaze Presents
Winter Sessions ...
Der Mystic3
SOZVAPQ12A8C13B63CSymphony No. 1 G minor
\"Sinfonie ...
Berwald: Symphonies Nos.
1/2/3/4 ...
David Montgomery9
SOQVRHI12A6D4FB2D7We Have Got LoveStrictly The Best Vol. 34Sasha / Turbulence4
SOEYRFT12AB018936C2 Da Beat Ch'yallDa BombKris Kross4
SOPMIYT12A6D4F851EGoodbyeDanny BoyJoseph Locke1
SOJCFMH12A8C13B0C2Mama_ mama can't you see
? ...
March to cadence with the
US marines ...
The Sun Harbor's Chorus-
Documentary Recordings ...
6
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
my_ratingyearhow_old_was_icol1col2
020031420
01995620
020061740
020031430
00-198930
00-198990
00-198940
01993440
00-198910
00-198960
\n", "[1000000 rows x 10 columns]
Note: Only the head of the SFrame is printed.
You can use print_rows(num_rows=m, num_columns=n) to print more rows and columns.\n", "
" ], "text/plain": [ "Columns:\n", "\tsong_id\tstr\n", "\ttitle\tstr\n", "\trelease\tstr\n", "\tartist_name\tstr\n", "\ttitle_length\tint\n", "\tmy_rating\tint\n", "\tyear\tint\n", "\thow_old_was_i\tint\n", "\tcol1\tint\n", "\tcol2\tint\n", "\n", "Rows: 1000000\n", "\n", "Data:\n", "+--------------------+-------------------------------+\n", "| song_id | title |\n", "+--------------------+-------------------------------+\n", "| SOQMMHC12AB0180CB8 | Silent Night |\n", "| SOVFVAK12A8C1350D9 | Tanssi vaan |\n", "| SOGTUKN12AB017F4F1 | No One Could Ever |\n", "| SOBNYVR12A8C13558C | Si Vos Querés |\n", "| SOHSBXH12A8C13B0DF | Tangle Of Aspens |\n", "| SOZVAPQ12A8C13B63C | Symphony No. 1 G minor \"Si... |\n", "| SOQVRHI12A6D4FB2D7 | We Have Got Love |\n", "| SOEYRFT12AB018936C | 2 Da Beat Ch'yall |\n", "| SOPMIYT12A6D4F851E | Goodbye |\n", "| SOJCFMH12A8C13B0C2 | Mama_ mama can't you see ? |\n", "+--------------------+-------------------------------+\n", "+-------------------------------+-------------------------------+--------------+\n", "| release | artist_name | title_length |\n", "+-------------------------------+-------------------------------+--------------+\n", "| Monster Ballads X-Mas | Faster Pussy cat | 2 |\n", "| Karkuteillä | Karkkiautomaatti | 2 |\n", "| Butter | Hudson Mohawke | 4 |\n", "| De Culo | Yerba Brava | 3 |\n", "| Rene Ablaze Presents Winte... | Der Mystic | 3 |\n", "| Berwald: Symphonies Nos. 1... | David Montgomery | 9 |\n", "| Strictly The Best Vol. 34 | Sasha / Turbulence | 4 |\n", "| Da Bomb | Kris Kross | 4 |\n", "| Danny Boy | Joseph Locke | 1 |\n", "| March to cadence with the ... | The Sun Harbor's Chorus-Do... | 6 |\n", "+-------------------------------+-------------------------------+--------------+\n", "+-----------+------+---------------+------+------+\n", "| my_rating | year | how_old_was_i | col1 | col2 |\n", "+-----------+------+---------------+------+------+\n", "| 0 | 2003 | 14 | 2 | 0 |\n", "| 0 | 1995 | 6 | 2 | 0 |\n", "| 0 | 2006 | 17 | 4 | 0 |\n", "| 0 | 2003 | 14 | 3 | 0 |\n", "| 0 | 0 | -1989 | 3 | 0 |\n", "| 0 | 0 | -1989 | 9 | 0 |\n", "| 0 | 0 | -1989 | 4 | 0 |\n", "| 0 | 1993 | 4 | 4 | 0 |\n", "| 0 | 0 | -1989 | 1 | 0 |\n", "| 0 | 0 | -1989 | 6 | 0 |\n", "+-----------+------+---------------+------+------+\n", "[1000000 rows x 10 columns]\n", "Note: Only the head of the SFrame is printed.\n", "You can use print_rows(num_rows=m, num_columns=n) to print more rows and columns." ] }, "execution_count": 18, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# swap columns location\n", "sdata.swap_columns('year','title_length')\n", "sdata" ] }, { "cell_type": "code", "execution_count": 19, "metadata": { "collapsed": false, "scrolled": false }, "outputs": [ { "data": { "text/plain": [ "(['song_id',\n", " 'title',\n", " 'release',\n", " 'artist_name',\n", " 'title_length',\n", " 'my_rating',\n", " 'year',\n", " 'how_old_was_i',\n", " 'col1',\n", " 'col2'],\n", " [str, str, str, str, int, int, int, int, int, int])" ] }, "execution_count": 19, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# change the column data type\n", "sdata.column_names(),sdata.column_types()" ] }, { "cell_type": "code", "execution_count": 20, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "[str, str, str, str, int, float, int, int, int, int]" ] }, "execution_count": 20, "metadata": {}, "output_type": "execute_result" } ], "source": [ "sdata['my_rating'] = sdata['my_rating'].astype(float)\n", "sdata.column_types()" ] }, { "cell_type": "code", "execution_count": 21, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
song_idtitlereleaseartist_nameyearmy_rating
SOQMMHC12AB0180CB8Silent NightMonster Ballads X-MasFaster Pussy cat20030.0
SOVFVAK12A8C1350D9Tanssi vaanKarkuteilläKarkkiautomaatti19950.0
SOGTUKN12AB017F4F1No One Could EverButterHudson Mohawke20060.0
SOBNYVR12A8C13558CSi Vos QuerésDe CuloYerba Brava20030.0
SOHSBXH12A8C13B0DFTangle Of AspensRene Ablaze Presents
Winter Sessions ...
Der Mystic00.0
SOZVAPQ12A8C13B63CSymphony No. 1 G minor
\"Sinfonie ...
Berwald: Symphonies Nos.
1/2/3/4 ...
David Montgomery00.0
SOQVRHI12A6D4FB2D7We Have Got LoveStrictly The Best Vol. 34Sasha / Turbulence00.0
SOEYRFT12AB018936C2 Da Beat Ch'yallDa BombKris Kross19930.0
SOPMIYT12A6D4F851EGoodbyeDanny BoyJoseph Locke00.0
SOJCFMH12A8C13B0C2Mama_ mama can't you see
? ...
March to cadence with the
US marines ...
The Sun Harbor's Chorus-
Documentary Recordings ...
00.0
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
title_lengthhow_old_was_icol1col2
21420
2620
41740
31430
3-198930
9-198990
4-198940
4440
1-198910
6-198960
\n", "[1000000 rows x 10 columns]
Note: Only the head of the SFrame is printed.
You can use print_rows(num_rows=m, num_columns=n) to print more rows and columns.\n", "
" ], "text/plain": [ "Columns:\n", "\tsong_id\tstr\n", "\ttitle\tstr\n", "\trelease\tstr\n", "\tartist_name\tstr\n", "\tyear\tint\n", "\tmy_rating\tfloat\n", "\ttitle_length\tint\n", "\thow_old_was_i\tint\n", "\tcol1\tint\n", "\tcol2\tint\n", "\n", "Rows: 1000000\n", "\n", "Data:\n", "+--------------------+-------------------------------+\n", "| song_id | title |\n", "+--------------------+-------------------------------+\n", "| SOQMMHC12AB0180CB8 | Silent Night |\n", "| SOVFVAK12A8C1350D9 | Tanssi vaan |\n", "| SOGTUKN12AB017F4F1 | No One Could Ever |\n", "| SOBNYVR12A8C13558C | Si Vos Querés |\n", "| SOHSBXH12A8C13B0DF | Tangle Of Aspens |\n", "| SOZVAPQ12A8C13B63C | Symphony No. 1 G minor \"Si... |\n", "| SOQVRHI12A6D4FB2D7 | We Have Got Love |\n", "| SOEYRFT12AB018936C | 2 Da Beat Ch'yall |\n", "| SOPMIYT12A6D4F851E | Goodbye |\n", "| SOJCFMH12A8C13B0C2 | Mama_ mama can't you see ? |\n", "+--------------------+-------------------------------+\n", "+-------------------------------+-------------------------------+------+-----------+\n", "| release | artist_name | year | my_rating |\n", "+-------------------------------+-------------------------------+------+-----------+\n", "| Monster Ballads X-Mas | Faster Pussy cat | 2003 | 0.0 |\n", "| Karkuteillä | Karkkiautomaatti | 1995 | 0.0 |\n", "| Butter | Hudson Mohawke | 2006 | 0.0 |\n", "| De Culo | Yerba Brava | 2003 | 0.0 |\n", "| Rene Ablaze Presents Winte... | Der Mystic | 0 | 0.0 |\n", "| Berwald: Symphonies Nos. 1... | David Montgomery | 0 | 0.0 |\n", "| Strictly The Best Vol. 34 | Sasha / Turbulence | 0 | 0.0 |\n", "| Da Bomb | Kris Kross | 1993 | 0.0 |\n", "| Danny Boy | Joseph Locke | 0 | 0.0 |\n", "| March to cadence with the ... | The Sun Harbor's Chorus-Do... | 0 | 0.0 |\n", "+-------------------------------+-------------------------------+------+-----------+\n", "+--------------+---------------+------+------+\n", "| title_length | how_old_was_i | col1 | col2 |\n", "+--------------+---------------+------+------+\n", "| 2 | 14 | 2 | 0 |\n", "| 2 | 6 | 2 | 0 |\n", "| 4 | 17 | 4 | 0 |\n", "| 3 | 14 | 3 | 0 |\n", "| 3 | -1989 | 3 | 0 |\n", "| 9 | -1989 | 9 | 0 |\n", "| 4 | -1989 | 4 | 0 |\n", "| 4 | 4 | 4 | 0 |\n", "| 1 | -1989 | 1 | 0 |\n", "| 6 | -1989 | 6 | 0 |\n", "+--------------+---------------+------+------+\n", "[1000000 rows x 10 columns]\n", "Note: Only the head of the SFrame is printed.\n", "You can use print_rows(num_rows=m, num_columns=n) to print more rows and columns." ] }, "execution_count": 21, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# swap column location\n", "sdata.swap_columns('title_length','year')" ] }, { "cell_type": "code", "execution_count": 22, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
song_idtitlereleaseartist_nameyearmy_ratingtitle_length
SOQMMHC12AB0180CB8Silent NightMonster Ballads X-MasFaster Pussy cat20030.02
SOVFVAK12A8C1350D9Tanssi vaanKarkuteilläKarkkiautomaatti19950.02
SOGTUKN12AB017F4F1No One Could EverButterHudson Mohawke20060.04
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
how_old_was_icol1col2no_of_a
14205
6208
17401
\n", "[3 rows x 11 columns]
\n", "
" ], "text/plain": [ "Columns:\n", "\tsong_id\tstr\n", "\ttitle\tstr\n", "\trelease\tstr\n", "\tartist_name\tstr\n", "\tyear\tint\n", "\tmy_rating\tfloat\n", "\ttitle_length\tint\n", "\thow_old_was_i\tint\n", "\tcol1\tint\n", "\tcol2\tint\n", "\tno_of_a\tint\n", "\n", "Rows: 3\n", "\n", "Data:\n", "+--------------------+-------------------+-----------------------+\n", "| song_id | title | release |\n", "+--------------------+-------------------+-----------------------+\n", "| SOQMMHC12AB0180CB8 | Silent Night | Monster Ballads X-Mas |\n", "| SOVFVAK12A8C1350D9 | Tanssi vaan | Karkuteillä |\n", "| SOGTUKN12AB017F4F1 | No One Could Ever | Butter |\n", "+--------------------+-------------------+-----------------------+\n", "+------------------+------+-----------+--------------+---------------+------+------+\n", "| artist_name | year | my_rating | title_length | how_old_was_i | col1 | col2 |\n", "+------------------+------+-----------+--------------+---------------+------+------+\n", "| Faster Pussy cat | 2003 | 0.0 | 2 | 14 | 2 | 0 |\n", "| Karkkiautomaatti | 1995 | 0.0 | 2 | 6 | 2 | 0 |\n", "| Hudson Mohawke | 2006 | 0.0 | 4 | 17 | 4 | 0 |\n", "+------------------+------+-----------+--------------+---------------+------+------+\n", "+---------+\n", "| no_of_a |\n", "+---------+\n", "| 5 |\n", "| 8 |\n", "| 1 |\n", "+---------+\n", "[3 rows x 11 columns]" ] }, "execution_count": 22, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# let's try to calculate how many 'a' are there in 'title', 'release' & 'artist_name' column\n", "sdata.add_column(sdata['title', 'release' , 'artist_name'].apply(lambda row:sum(word.count('a') for word in row.values())),'no_of_a')\n", "sdata.head(3)\n" ] }, { "cell_type": "markdown", "metadata": { "collapsed": true }, "source": [ "## Checking Missing values" ] }, { "cell_type": "code", "execution_count": 23, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
yearCount
19875125
20111
199815858
19907258
19918650
195084
\n", "[6 rows x 2 columns]
\n", "
" ], "text/plain": [ "Columns:\n", "\tyear\tint\n", "\tCount\tint\n", "\n", "Rows: 6\n", "\n", "Data:\n", "+------+-------+\n", "| year | Count |\n", "+------+-------+\n", "| 1987 | 5125 |\n", "| 2011 | 1 |\n", "| 1998 | 15858 |\n", "| 1990 | 7258 |\n", "| 1991 | 8650 |\n", "| 1950 | 84 |\n", "+------+-------+\n", "[6 rows x 2 columns]" ] }, "execution_count": 23, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# checking the missing year\n", "year_count = sdata.groupby('year', gl.aggregate.COUNT)\n", "year_count.head(6)" ] }, { "cell_type": "code", "execution_count": 24, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "No of unique years : 90\n" ] } ], "source": [ "# no of unique years\n", "print \"No of unique years :\", str(len(year_count))" ] }, { "cell_type": "code", "execution_count": 25, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "no of invalid year count \n" ] }, { "data": { "text/html": [ "
\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
yearCount
0484424
\n", "[1 rows x 2 columns]
\n", "
" ], "text/plain": [ "Columns:\n", "\tyear\tint\n", "\tCount\tint\n", "\n", "Rows: 1\n", "\n", "Data:\n", "+------+--------+\n", "| year | Count |\n", "+------+--------+\n", "| 0 | 484424 |\n", "+------+--------+\n", "[1 rows x 2 columns]" ] }, "execution_count": 25, "metadata": {}, "output_type": "execute_result" } ], "source": [ "print \"no of invalid year count \"\n", "year_count.topk('year', reverse=True, k=1)" ] }, { "cell_type": "code", "execution_count": 26, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
song_idtitlereleaseartist_nameyearmy_ratingtitle_length
SOQMMHC12AB0180CB8Silent NightMonster Ballads X-MasFaster Pussy cat20030.02
SOVFVAK12A8C1350D9Tanssi vaanKarkuteilläKarkkiautomaatti19950.02
SOGTUKN12AB017F4F1No One Could EverButterHudson Mohawke20060.04
SOBNYVR12A8C13558CSi Vos QuerésDe CuloYerba Brava20030.03
SOHSBXH12A8C13B0DFTangle Of AspensRene Ablaze Presents
Winter Sessions ...
Der MysticNone0.03
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
how_old_was_icol1col2no_of_a
14205
6208
17401
14303
-1989302
\n", "[5 rows x 11 columns]
\n", "
" ], "text/plain": [ "Columns:\n", "\tsong_id\tstr\n", "\ttitle\tstr\n", "\trelease\tstr\n", "\tartist_name\tstr\n", "\tyear\tint\n", "\tmy_rating\tfloat\n", "\ttitle_length\tint\n", "\thow_old_was_i\tint\n", "\tcol1\tint\n", "\tcol2\tint\n", "\tno_of_a\tint\n", "\n", "Rows: 5\n", "\n", "Data:\n", "+--------------------+-------------------+-------------------------------+\n", "| song_id | title | release |\n", "+--------------------+-------------------+-------------------------------+\n", "| SOQMMHC12AB0180CB8 | Silent Night | Monster Ballads X-Mas |\n", "| SOVFVAK12A8C1350D9 | Tanssi vaan | Karkuteillä |\n", "| SOGTUKN12AB017F4F1 | No One Could Ever | Butter |\n", "| SOBNYVR12A8C13558C | Si Vos Querés | De Culo |\n", "| SOHSBXH12A8C13B0DF | Tangle Of Aspens | Rene Ablaze Presents Winte... |\n", "+--------------------+-------------------+-------------------------------+\n", "+------------------+------+-----------+--------------+---------------+------+------+\n", "| artist_name | year | my_rating | title_length | how_old_was_i | col1 | col2 |\n", "+------------------+------+-----------+--------------+---------------+------+------+\n", "| Faster Pussy cat | 2003 | 0.0 | 2 | 14 | 2 | 0 |\n", "| Karkkiautomaatti | 1995 | 0.0 | 2 | 6 | 2 | 0 |\n", "| Hudson Mohawke | 2006 | 0.0 | 4 | 17 | 4 | 0 |\n", "| Yerba Brava | 2003 | 0.0 | 3 | 14 | 3 | 0 |\n", "| Der Mystic | None | 0.0 | 3 | -1989 | 3 | 0 |\n", "+------------------+------+-----------+--------------+---------------+------+------+\n", "+---------+\n", "| no_of_a |\n", "+---------+\n", "| 5 |\n", "| 8 |\n", "| 1 |\n", "| 3 |\n", "| 2 |\n", "+---------+\n", "[5 rows x 11 columns]" ] }, "execution_count": 26, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# year 0 is invalid value to better convert it to None\n", "sdata['year'] = sdata['year'].apply(lambda x : None if x==0 else x)\n", "sdata.head(5)" ] }, { "cell_type": "code", "execution_count": 27, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
song_idtitlereleaseartist_nameyearmy_ratingtitle_length
SOQMMHC12AB0180CB8Silent NightMonster Ballads X-MasFaster Pussy cat20030.02
SOVFVAK12A8C1350D9Tanssi vaanKarkuteilläKarkkiautomaatti19950.02
SOGTUKN12AB017F4F1No One Could EverButterHudson Mohawke20060.04
SOBNYVR12A8C13558CSi Vos QuerésDe CuloYerba Brava20030.03
SOHSBXH12A8C13B0DFTangle Of AspensRene Ablaze Presents
Winter Sessions ...
Der MysticNone0.03
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
how_old_was_icol1col2no_of_a
14205
6208
17401
14303
None302
\n", "[5 rows x 11 columns]
\n", "
" ], "text/plain": [ "Columns:\n", "\tsong_id\tstr\n", "\ttitle\tstr\n", "\trelease\tstr\n", "\tartist_name\tstr\n", "\tyear\tint\n", "\tmy_rating\tfloat\n", "\ttitle_length\tint\n", "\thow_old_was_i\tint\n", "\tcol1\tint\n", "\tcol2\tint\n", "\tno_of_a\tint\n", "\n", "Rows: 5\n", "\n", "Data:\n", "+--------------------+-------------------+-------------------------------+\n", "| song_id | title | release |\n", "+--------------------+-------------------+-------------------------------+\n", "| SOQMMHC12AB0180CB8 | Silent Night | Monster Ballads X-Mas |\n", "| SOVFVAK12A8C1350D9 | Tanssi vaan | Karkuteillä |\n", "| SOGTUKN12AB017F4F1 | No One Could Ever | Butter |\n", "| SOBNYVR12A8C13558C | Si Vos Querés | De Culo |\n", "| SOHSBXH12A8C13B0DF | Tangle Of Aspens | Rene Ablaze Presents Winte... |\n", "+--------------------+-------------------+-------------------------------+\n", "+------------------+------+-----------+--------------+---------------+------+------+\n", "| artist_name | year | my_rating | title_length | how_old_was_i | col1 | col2 |\n", "+------------------+------+-----------+--------------+---------------+------+------+\n", "| Faster Pussy cat | 2003 | 0.0 | 2 | 14 | 2 | 0 |\n", "| Karkkiautomaatti | 1995 | 0.0 | 2 | 6 | 2 | 0 |\n", "| Hudson Mohawke | 2006 | 0.0 | 4 | 17 | 4 | 0 |\n", "| Yerba Brava | 2003 | 0.0 | 3 | 14 | 3 | 0 |\n", "| Der Mystic | None | 0.0 | 3 | None | 3 | 0 |\n", "+------------------+------+-----------+--------------+---------------+------+------+\n", "+---------+\n", "| no_of_a |\n", "+---------+\n", "| 5 |\n", "| 8 |\n", "| 1 |\n", "| 3 |\n", "| 2 |\n", "+---------+\n", "[5 rows x 11 columns]" ] }, "execution_count": 27, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# now I have to fix the value of 'How_old_Was_i' column as well\n", "# I can do this two ways, one - substract - year - born_year \n", "# or I can use the same apply function with NONE if x < 0\n", "sdata['how_old_was_i'] = sdata['year'].apply(lambda x : None if x is None else x-my_age)\n", "sdata.head(5)\n" ] }, { "cell_type": "code", "execution_count": 28, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "515576\n" ] } ], "source": [ "# check the no of songs which have valid years\n", "print len(sdata[sdata['year']>0])" ] }, { "cell_type": "code", "execution_count": 29, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[1L, 1L, 1L, 1L, None, None, None, 1L, None, None, 1L, 1L, None, 1L, None, 1L, 1L, 1L, 1L, None, None, None, 1L, 1L, None, None, 1L, 1L, None, None, None, None, None, 1L, 1L, 1L, None, 1L, None, 1L, None, 1L, None, 1L, 1L, 1L, None, 1L, None, None, None, None, 1L, 1L, 1L, None, None, 1L, None, None, 1L, 1L, None, 1L, 1L, 1L, None, None, None, 1L, None, None, 1L, None, 1L, 1L, None, None, None, None, 1L, None, None, 1L, 1L, None, 1L, None, 1L, None, None, None, 1L, 1L, 1L, 1L, None, 1L, 1L, 1L, ... ]\n" ] } ], "source": [ "tmp = sdata['year']>0\n", "print tmp" ] }, { "cell_type": "code", "execution_count": 30, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "mean: 3.369894\n", "std: 2.17226527587\n", "var: 4.71873642876\n", "min: 0\n", "max: 47\n", "sum: 3369894\n", "number of non-zero entries: 999985\n" ] } ], "source": [ "# Look at lots of descriptive statistics of title_length\n", "print \"mean: \" + str(sdata['title_length'].mean())\n", "print \"std: \" + str(sdata['title_length'].std())\n", "print \"var: \" + str(sdata['title_length'].var())\n", "print \"min: \" + str(sdata['title_length'].min())\n", "print \"max: \" + str(sdata['title_length'].max())\n", "print \"sum: \" + str(sdata['title_length'].sum())\n", "print \"number of non-zero entries: \" + str(sdata['title_length'].nnz())" ] }, { "cell_type": "code", "execution_count": 31, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", "+--------------------+---------------+----------+\n", "| item | value | is exact |\n", "+--------------------+---------------+----------+\n", "| Length | 1000000 | Yes |\n", "| Min | 0.0 | Yes |\n", "| Max | 47.0 | Yes |\n", "| Mean | 3.369894 | Yes |\n", "| Sum | 3369894.0 | Yes |\n", "| Variance | 4.71873642876 | Yes |\n", "| Standard Deviation | 2.17226527587 | Yes |\n", "| # Missing Values | 0 | Yes |\n", "| # unique values | 44 | No |\n", "+--------------------+---------------+----------+\n", "\n", "Most frequent items:\n", "+-------+--------+--------+--------+--------+-------+-------+-------+-------+\n", "| value | 2 | 3 | 1 | 4 | 5 | 6 | 7 | 8 |\n", "+-------+--------+--------+--------+--------+-------+-------+-------+-------+\n", "| count | 241587 | 217674 | 164124 | 152569 | 92148 | 54674 | 30777 | 17826 |\n", "+-------+--------+--------+--------+--------+-------+-------+-------+-------+\n", "+-------+------+\n", "| 9 | 10 |\n", "+-------+------+\n", "| 10465 | 6633 |\n", "+-------+------+\n", "\n", "Quantiles: \n", "+-----+-----+-----+-----+-----+-----+-----+------+------+\n", "| 0% | 1% | 5% | 25% | 50% | 75% | 95% | 99% | 100% |\n", "+-----+-----+-----+-----+-----+-----+-----+------+------+\n", "| 0.0 | 1.0 | 1.0 | 2.0 | 3.0 | 4.0 | 7.0 | 11.0 | 47.0 |\n", "+-----+-----+-----+-----+-----+-----+-----+------+------+\n", "\n" ] } ], "source": [ "approx_sketch = sdata['title_length'].sketch_summary()\n", "print approx_sketch" ] }, { "cell_type": "code", "execution_count": 32, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "+--------------------+-------------------------------+\n", "| song_id | title |\n", "+--------------------+-------------------------------+\n", "| SOAALOO12AC468C4ED | Resolution Island Suite I)... |\n", "| SOSEWOR12AB018BDF3 | Guayacan Mix: Amor Traicio... |\n", "| SOEBPPZ12AB0183730 | Son Of Scheherazade: Pt.1-... |\n", "| SODRVPW12A8C13DDCB | And by our own hand did ev... |\n", "| SODEYRT12A8AE47972 | Any Place I Hang My Hat is... |\n", "| SOEICJI12AC3DFAAD4 | Throw Away Comedy Medley: ... |\n", "| SORHPYP12AB017C661 | Happenings' Medley: Oh! Th... |\n", "| SOKNNWV12AB017B473 | If I Had My Way/Irish Rose... |\n", "| SOWKFQF12AAA8C85E4 | Manitoba Ne Répond Plus (C... |\n", "| SOGFUVA12A58A773AB | They Call Me Rock 'N' Roll... |\n", "+--------------------+-------------------------------+\n", "+-------------------------------+--------------------------+------+-----------+\n", "| release | artist_name | year | my_rating |\n", "+-------------------------------+--------------------------+------+-----------+\n", "| Allegory Of Hearing | Roy Montgomery | None | 0.0 |\n", "| Como en un baile | Guayacan | None | 0.0 |\n", "| British Tour '76 | Renaissance | None | 0.0 |\n", "| Every Red Heart Shines Tow... | Red Sparowes | 2006 | 0.0 |\n", "| Too Marvelous For Words - ... | Lee Lessack | None | 0.0 |\n", "| Live At The Sands | Dean Martin | None | 0.0 |\n", "| The Cat & The Fiddle - 66 ... | The Mother Goose Singers | None | 0.0 |\n", "| 1999 International Barbers... | Swing City | None | 0.0 |\n", "| Manitoba Ne Répond Plus | Gérard Manset | 2008 | 0.0 |\n", "| Motor City Connection | Brownsville Station | 1975 | 0.0 |\n", "+-------------------------------+--------------------------+------+-----------+\n", "+--------------+---------------+------+------+---------+\n", "| title_length | how_old_was_i | col1 | col2 | no_of_a |\n", "+--------------+---------------+------+------+---------+\n", "| 47 | None | 47 | 0 | 10 |\n", "| 46 | None | 46 | 0 | 22 |\n", "| 46 | None | 46 | 0 | 17 |\n", "| 45 | 17 | 45 | 0 | 16 |\n", "| 44 | None | 44 | 0 | 14 |\n", "| 44 | None | 44 | 0 | 14 |\n", "| 40 | None | 40 | 0 | 6 |\n", "| 40 | None | 40 | 0 | 21 |\n", "| 39 | 19 | 39 | 0 | 21 |\n", "| 39 | -14 | 39 | 0 | 9 |\n", "+--------------+---------------+------+------+---------+\n", "[10 rows x 11 columns]\n", "\n" ] } ], "source": [ "# lets check which songs are having largest and smallest length\n", "top_title_length = sdata.topk('title_length')\n", "print top_title_length" ] }, { "cell_type": "code", "execution_count": 33, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "+--------------------+-------+-------------------------------+----------------+\n", "| song_id | title | release | artist_name |\n", "+--------------------+-------+-------------------------------+----------------+\n", "| SOVICLT12A58A7C4D0 | | Of Flies And Men | Milton |\n", "| SOAGRAA12AB018D567 | | Puce de luxe | Sébastien Roch |\n", "| SOAUTVB12AB018AFF0 | | Puce de luxe | Sébastien Roch |\n", "| SOMPGYE12AB018AFE0 | | Puce de luxe | Sébastien Roch |\n", "| SOVHAZH12AB018D5B3 | | Puce de luxe | Sébastien Roch |\n", "| SOECGBQ12AB018D582 | | Puce de luxe | Sébastien Roch |\n", "| SOFDILP12AC960A4EF | | Recupera tus Clásicos - Fobia | Fobia |\n", "| SOSFNKO12AB018D5BB | | Puce de luxe | Sébastien Roch |\n", "| SOZDBDL12AB018AFFF | | Puce de luxe | Sébastien Roch |\n", "| SOQUGMS12AB018B01D | | Puce de luxe | Sébastien Roch |\n", "+--------------------+-------+-------------------------------+----------------+\n", "+------+-----------+--------------+---------------+------+------+---------+\n", "| year | my_rating | title_length | how_old_was_i | col1 | col2 | no_of_a |\n", "+------+-----------+--------------+---------------+------+------+---------+\n", "| None | 0.0 | 0 | None | 0 | 0 | 0 |\n", "| None | 0.0 | 0 | None | 0 | 0 | 1 |\n", "| None | 0.0 | 0 | None | 0 | 0 | 1 |\n", "| None | 0.0 | 0 | None | 0 | 0 | 1 |\n", "| None | 0.0 | 0 | None | 0 | 0 | 1 |\n", "| None | 0.0 | 0 | None | 0 | 0 | 1 |\n", "| None | 0.0 | 0 | None | 0 | 0 | 3 |\n", "| None | 0.0 | 0 | None | 0 | 0 | 1 |\n", "| None | 0.0 | 0 | None | 0 | 0 | 1 |\n", "| None | 0.0 | 0 | None | 0 | 0 | 1 |\n", "+------+-----------+--------------+---------------+------+------+---------+\n", "[10 rows x 11 columns]\n", "\n" ] } ], "source": [ "# what about lowest\n", "lowest_title_lenght = sdata.topk('title_length', reverse=True)\n", "print lowest_title_lenght" ] }, { "cell_type": "code", "execution_count": 34, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "+--------------------+-------+---------------------+----------------+------+-----------+\n", "| song_id | title | release | artist_name | year | my_rating |\n", "+--------------------+-------+---------------------+----------------+------+-----------+\n", "| SOVICLT12A58A7C4D0 | | Of Flies And Men | Milton | None | 0.0 |\n", "| SOAGRAA12AB018D567 | | Puce de luxe | Sébastien Roch | None | 0.0 |\n", "| SOOAFJX12AB018A028 | | Puce de luxe | Sébastien Roch | None | 0.0 |\n", "| SOLDTFD12AB018AFE6 | | Puce de luxe | Sébastien Roch | None | 0.0 |\n", "| SOAUTVB12AB018AFF0 | | Puce de luxe | Sébastien Roch | None | 0.0 |\n", "| SOMPGYE12AB018AFE0 | | Puce de luxe | Sébastien Roch | None | 0.0 |\n", "| SOVHAZH12AB018D5B3 | | Puce de luxe | Sébastien Roch | None | 0.0 |\n", "| SOECGBQ12AB018D582 | | Puce de luxe | Sébastien Roch | None | 0.0 |\n", "| SOTJWHR12AB018D5A0 | | Puce de luxe | Sébastien Roch | None | 0.0 |\n", "| SOBDDHR12AB01888A6 | | Grand Glam Outtakes | Remute | None | 0.0 |\n", "+--------------------+-------+---------------------+----------------+------+-----------+\n", "+--------------+---------------+------+------+---------+\n", "| title_length | how_old_was_i | col1 | col2 | no_of_a |\n", "+--------------+---------------+------+------+---------+\n", "| 0 | None | 0 | 0 | 0 |\n", "| 0 | None | 0 | 0 | 1 |\n", "| 0 | None | 0 | 0 | 1 |\n", "| 0 | None | 0 | 0 | 1 |\n", "| 0 | None | 0 | 0 | 1 |\n", "| 0 | None | 0 | 0 | 1 |\n", "| 0 | None | 0 | 0 | 1 |\n", "| 0 | None | 0 | 0 | 1 |\n", "| 0 | None | 0 | 0 | 1 |\n", "| 0 | None | 0 | 0 | 3 |\n", "+--------------+---------------+------+------+---------+\n", "[15 rows x 11 columns]\n", "Note: Only the head of the SFrame is printed.\n", "You can use print_rows(num_rows=m, num_columns=n) to print more rows and columns.\n" ] } ], "source": [ "# lowest 15\n", "# what about lowest\n", "lowest_title_lenght = sdata.topk('title_length', reverse=True, k =15)\n", "print lowest_title_lenght" ] }, { "cell_type": "code", "execution_count": 35, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "(False, True)" ] }, "execution_count": 35, "metadata": {}, "output_type": "execute_result" } ], "source": [ "before_i_was_born = sdata['how_old_was_i'] < 0\n", "before_i_was_born.all(), before_i_was_born.any()" ] }, { "cell_type": "code", "execution_count": 36, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
artist_namereleaseno_of_songs_in_album
FannyFirst Time In A Long
Time: The Reprise ...
85
Bernard HerrmannThe Twilight Zone81
Spanky & Our GangThe Complete Mercury
Recordings ...
75
The Smashing PumpkinsRarities & B-Sides72
Big StarKeep An Eye On The Sky71
Jacques DutroncIntégrale Les Cactus69
The Stooges1970: The Complete Fun
House Sessions ...
67
LullMoments64
Willie ClancyWillie Clancy The Gold
Ring ...
61
Jack DangersForbidden Planet Explored
/ Sci-Fi Sound Effects ...
60
\n", "[10 rows x 3 columns]
\n", "
" ], "text/plain": [ "Columns:\n", "\tartist_name\tstr\n", "\trelease\tstr\n", "\tno_of_songs_in_album\tint\n", "\n", "Rows: 10\n", "\n", "Data:\n", "+-----------------------+-------------------------------+----------------------+\n", "| artist_name | release | no_of_songs_in_album |\n", "+-----------------------+-------------------------------+----------------------+\n", "| Fanny | First Time In A Long Time:... | 85 |\n", "| Bernard Herrmann | The Twilight Zone | 81 |\n", "| Spanky & Our Gang | The Complete Mercury Recor... | 75 |\n", "| The Smashing Pumpkins | Rarities & B-Sides | 72 |\n", "| Big Star | Keep An Eye On The Sky | 71 |\n", "| Jacques Dutronc | Intégrale Les Cactus | 69 |\n", "| The Stooges | 1970: The Complete Fun Hou... | 67 |\n", "| Lull | Moments | 64 |\n", "| Willie Clancy | Willie Clancy The Gold Ring | 61 |\n", "| Jack Dangers | Forbidden Planet Explored ... | 60 |\n", "+-----------------------+-------------------------------+----------------------+\n", "[10 rows x 3 columns]" ] }, "execution_count": 36, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# get total songs in an album and display the top album by no\n", "sdata.groupby(['artist_name','release'], {'no_of_songs_in_album':gl.aggregate.COUNT} ).topk('no_of_songs_in_album')" ] }, { "cell_type": "code", "execution_count": 37, "metadata": { "collapsed": true }, "outputs": [], "source": [ "# this will download the 118 MB file\n", "#usage_data = gl.SFrame.read_csv(\"https://static.turi.com/datasets/millionsong/10000.txt\", header=False, delimiter='\\t', column_type_hints={'X3':int})\n", "#usage_data.rename({'X1':'user_id', 'X2':'song_id', 'X3':'listen_count'})" ] }, { "cell_type": "code", "execution_count": 38, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
Finished parsing file C:\\learn\\ML\\ML00caseStudy\\week01Intro\\10000.txt
" ], "text/plain": [ "Finished parsing file C:\\learn\\ML\\ML00caseStudy\\week01Intro\\10000.txt" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
Parsing completed. Parsed 100 lines in 3.04661 secs.
" ], "text/plain": [ "Parsing completed. Parsed 100 lines in 3.04661 secs." ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
Read 844838 lines. Lines per second: 265452
" ], "text/plain": [ "Read 844838 lines. Lines per second: 265452" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
Finished parsing file C:\\learn\\ML\\ML00caseStudy\\week01Intro\\10000.txt
" ], "text/plain": [ "Finished parsing file C:\\learn\\ML\\ML00caseStudy\\week01Intro\\10000.txt" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
Parsing completed. Parsed 2000000 lines in 4.35327 secs.
" ], "text/plain": [ "Parsing completed. Parsed 2000000 lines in 4.35327 secs." ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
user_idsong_idlisten_count
b80344d063b5ccb3212f76538
f3d9e43d87dca9e ...
SOAKIMP12A8C1309951
b80344d063b5ccb3212f76538
f3d9e43d87dca9e ...
SOBBMDR12A8C13253B2
b80344d063b5ccb3212f76538
f3d9e43d87dca9e ...
SOBXHDL12A81C204C01
b80344d063b5ccb3212f76538
f3d9e43d87dca9e ...
SOBYHAJ12A6701BF1D1
b80344d063b5ccb3212f76538
f3d9e43d87dca9e ...
SODACBL12A8C13C2731
b80344d063b5ccb3212f76538
f3d9e43d87dca9e ...
SODDNQT12A6D4F5F7E5
b80344d063b5ccb3212f76538
f3d9e43d87dca9e ...
SODXRTY12AB0180F3B1
b80344d063b5ccb3212f76538
f3d9e43d87dca9e ...
SOFGUAY12AB017B0A81
b80344d063b5ccb3212f76538
f3d9e43d87dca9e ...
SOFRQTD12A81C233C01
b80344d063b5ccb3212f76538
f3d9e43d87dca9e ...
SOHQWYZ12A6D4FA7011
\n", "[2000000 rows x 3 columns]
Note: Only the head of the SFrame is printed.
You can use print_rows(num_rows=m, num_columns=n) to print more rows and columns.\n", "
" ], "text/plain": [ "Columns:\n", "\tuser_id\tstr\n", "\tsong_id\tstr\n", "\tlisten_count\tint\n", "\n", "Rows: 2000000\n", "\n", "Data:\n", "+-------------------------------+--------------------+--------------+\n", "| user_id | song_id | listen_count |\n", "+-------------------------------+--------------------+--------------+\n", "| b80344d063b5ccb3212f76538f... | SOAKIMP12A8C130995 | 1 |\n", "| b80344d063b5ccb3212f76538f... | SOBBMDR12A8C13253B | 2 |\n", "| b80344d063b5ccb3212f76538f... | SOBXHDL12A81C204C0 | 1 |\n", "| b80344d063b5ccb3212f76538f... | SOBYHAJ12A6701BF1D | 1 |\n", "| b80344d063b5ccb3212f76538f... | SODACBL12A8C13C273 | 1 |\n", "| b80344d063b5ccb3212f76538f... | SODDNQT12A6D4F5F7E | 5 |\n", "| b80344d063b5ccb3212f76538f... | SODXRTY12AB0180F3B | 1 |\n", "| b80344d063b5ccb3212f76538f... | SOFGUAY12AB017B0A8 | 1 |\n", "| b80344d063b5ccb3212f76538f... | SOFRQTD12A81C233C0 | 1 |\n", "| b80344d063b5ccb3212f76538f... | SOHQWYZ12A6D4FA701 | 1 |\n", "+-------------------------------+--------------------+--------------+\n", "[2000000 rows x 3 columns]\n", "Note: Only the head of the SFrame is printed.\n", "You can use print_rows(num_rows=m, num_columns=n) to print more rows and columns." ] }, "execution_count": 38, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Read the data\n", "usage_data = gl.SFrame.read_csv(\"C:/learn/ML/ML00caseStudy/week01Intro/10000.txt\", header=False, delimiter=\"\\t\", column_type_hints={'X3':int})\n", "usage_data.rename({'X1':'user_id', 'X2':'song_id', 'X3':'listen_count'})" ] }, { "cell_type": "code", "execution_count": 39, "metadata": { "collapsed": false }, "outputs": [], "source": [ "# saving this data frame\n", "usage_data.save('usage_data')\n", "# loading the data\n", "usage_data = gl.load_sframe('usage_data')" ] }, { "cell_type": "code", "execution_count": 40, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "76353\n" ] } ], "source": [ "# find out the unique users\n", "print len(usage_data['user_id'].unique())" ] }, { "cell_type": "code", "execution_count": 41, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
song_idtitlereleaseartist_nameyearmy_rating
SOQNUHJ12A6D4F9E19Let It All Out (Album
Version) ...
MMHMMRelient K20040.0
SOADZOM12A8C13E446The Blonde Lead The Blind
(Album Version) ...
Somewhere In The BetweenStreetlight Manifesto20070.0
SOKUTMT12A58A77CFBGive Until There's
Nothing Left ...
Five Score And Seven
Years Ago ...
Relient K20070.0
SOIONAH12A58A76FD1We Are The Few (Album
Version) ...
Everything Goes NumbStreetlight Manifesto20030.0
SOUUYHK12A6D4F9E16I So Hate Consequences
(Album Version) ...
MMHMMRelient K20040.0
SOCBUJT12A6D4F9E1AWho I Am Hates Who I've
Been (mmhmm Album ...
MMHMMRelient K20040.0
SOBUHJR12A6D4FDC7CHere's To Life (Album
Version) ...
Everything Goes NumbStreetlight Manifesto20030.0
SONKBWG12A6D4FB91DGiving Up_ Giving In (LP
Version) ...
Keasbey NightsStreetlight Manifesto20060.0
SOSFAVU12A6D4FDC6AEverything Went Numb
(Album Version) ...
Everything Goes NumbStreetlight Manifesto20030.0
SORYDMW12A6D4FB923This One Goes Out To....
(LP Version) ...
Keasbey NightsStreetlight Manifesto20060.0
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
title_lengthhow_old_was_icol1col2no_of_a
615600
718702
518501
614601
615601
10151001
514501
617602
514501
717702
\n", "[? rows x 11 columns]
Note: Only the head of the SFrame is printed. This SFrame is lazily evaluated.
You can use sf.materialize() to force materialization.\n", "
" ], "text/plain": [ "Columns:\n", "\tsong_id\tstr\n", "\ttitle\tstr\n", "\trelease\tstr\n", "\tartist_name\tstr\n", "\tyear\tint\n", "\tmy_rating\tfloat\n", "\ttitle_length\tint\n", "\thow_old_was_i\tint\n", "\tcol1\tint\n", "\tcol2\tint\n", "\tno_of_a\tint\n", "\n", "Rows: Unknown\n", "\n", "Data:\n", "+--------------------+--------------------------------+\n", "| song_id | title |\n", "+--------------------+--------------------------------+\n", "| SOQNUHJ12A6D4F9E19 | Let It All Out (Album Version) |\n", "| SOADZOM12A8C13E446 | The Blonde Lead The Blind ... |\n", "| SOKUTMT12A58A77CFB | Give Until There's Nothing... |\n", "| SOIONAH12A58A76FD1 | We Are The Few (Album Version) |\n", "| SOUUYHK12A6D4F9E16 | I So Hate Consequences (Al... |\n", "| SOCBUJT12A6D4F9E1A | Who I Am Hates Who I've Be... |\n", "| SOBUHJR12A6D4FDC7C | Here's To Life (Album Version) |\n", "| SONKBWG12A6D4FB91D | Giving Up_ Giving In (LP V... |\n", "| SOSFAVU12A6D4FDC6A | Everything Went Numb (Albu... |\n", "| SORYDMW12A6D4FB923 | This One Goes Out To.... (... |\n", "+--------------------+--------------------------------+\n", "+--------------------------------+-----------------------+------+-----------+--------------+\n", "| release | artist_name | year | my_rating | title_length |\n", "+--------------------------------+-----------------------+------+-----------+--------------+\n", "| MMHMM | Relient K | 2004 | 0.0 | 6 |\n", "| Somewhere In The Between | Streetlight Manifesto | 2007 | 0.0 | 7 |\n", "| Five Score And Seven Years Ago | Relient K | 2007 | 0.0 | 5 |\n", "| Everything Goes Numb | Streetlight Manifesto | 2003 | 0.0 | 6 |\n", "| MMHMM | Relient K | 2004 | 0.0 | 6 |\n", "| MMHMM | Relient K | 2004 | 0.0 | 10 |\n", "| Everything Goes Numb | Streetlight Manifesto | 2003 | 0.0 | 5 |\n", "| Keasbey Nights | Streetlight Manifesto | 2006 | 0.0 | 6 |\n", "| Everything Goes Numb | Streetlight Manifesto | 2003 | 0.0 | 5 |\n", "| Keasbey Nights | Streetlight Manifesto | 2006 | 0.0 | 7 |\n", "+--------------------------------+-----------------------+------+-----------+--------------+\n", "+---------------+------+------+---------+\n", "| how_old_was_i | col1 | col2 | no_of_a |\n", "+---------------+------+------+---------+\n", "| 15 | 6 | 0 | 0 |\n", "| 18 | 7 | 0 | 2 |\n", "| 18 | 5 | 0 | 1 |\n", "| 14 | 6 | 0 | 1 |\n", "| 15 | 6 | 0 | 1 |\n", "| 15 | 10 | 0 | 1 |\n", "| 14 | 5 | 0 | 1 |\n", "| 17 | 6 | 0 | 2 |\n", "| 14 | 5 | 0 | 1 |\n", "| 17 | 7 | 0 | 2 |\n", "+---------------+------+------+---------+\n", "[? rows x 11 columns]\n", "Note: Only the head of the SFrame is printed. This SFrame is lazily evaluated.\n", "You can use sf.materialize() to force materialization." ] }, "execution_count": 41, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# let's create two datasets which we can join\n", "ds1 = sdata[((sdata['artist_name'] == 'Relient K')\n", " | (sdata['artist_name'] == 'Streetlight Manifesto'))\n", " & (sdata['how_old_was_i'] >= 14) & (sdata['how_old_was_i'] <= 18)]\n", "ds1" ] }, { "cell_type": "code", "execution_count": 42, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
song_idtitlereleaseartist_nameyearmy_rating
SOMMLRO12A6D4F9FD1Must Have Done Something
Right ...
Must Have Done Something
Right ...
Relient K20070.0
SOCLCYG12A6D4FDC71Point/Counterpoint (Album
Version) ...
Everything Goes NumbStreetlight Manifesto20030.0
SOINPKF12A6D4FDC75A Better Place_ A Better
Time (Album Version) ...
Everything Goes NumbStreetlight Manifesto20030.0
SOMMLRO12A6D4F9FD1Must Have Done Something
Right ...
Must Have Done Something
Right ...
Relient K20070.0
SOMMLRO12A6D4F9FD1Must Have Done Something
Right ...
Must Have Done Something
Right ...
Relient K20070.0
SOMMLRO12A6D4F9FD1Must Have Done Something
Right ...
Must Have Done Something
Right ...
Relient K20070.0
SOPSQOS12A6D4F9E15High Of 75 (Album
Version) ...
MMHMMRelient K20040.0
SOSTGAF12A58A7B18BThe Best ThingFive Score And Seven
Years Ago ...
Relient K20070.0
SOUUYHK12A6D4F9E16I So Hate Consequences
(Album Version) ...
MMHMMRelient K20040.0
SOMMLRO12A6D4F9FD1Must Have Done Something
Right ...
Must Have Done Something
Right ...
Relient K20070.0
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
title_lengthhow_old_was_icol1col2no_of_auser_idlisten_count
5185023f9ed694a79835c921ef6d94a
cd28f876c1d901e ...
3
314301956044d724390e40c8511b49e
5bf6bc28071de3a ...
1
814802956044d724390e40c8511b49e
5bf6bc28071de3a ...
1
518502354cfdb566f543bb5b810a4d8
959d974a30797fd ...
1
518502e4d4e50c99ed5b3dfe740fa1c
cbe6be41eeb4f35 ...
14
518502ed0f4979ffddc0010244638a3
b956510624dc6a9 ...
3
515500ed0f4979ffddc0010244638a3
b956510624dc6a9 ...
2
318301ed0f4979ffddc0010244638a3
b956510624dc6a9 ...
1
615601ed0f4979ffddc0010244638a3
b956510624dc6a9 ...
1
5185020e741cfe121bb619177be8d8a
135a2d3692d9c90 ...
4
\n", "[571 rows x 13 columns]
Note: Only the head of the SFrame is printed.
You can use print_rows(num_rows=m, num_columns=n) to print more rows and columns.\n", "
" ], "text/plain": [ "Columns:\n", "\tsong_id\tstr\n", "\ttitle\tstr\n", "\trelease\tstr\n", "\tartist_name\tstr\n", "\tyear\tint\n", "\tmy_rating\tfloat\n", "\ttitle_length\tint\n", "\thow_old_was_i\tint\n", "\tcol1\tint\n", "\tcol2\tint\n", "\tno_of_a\tint\n", "\tuser_id\tstr\n", "\tlisten_count\tint\n", "\n", "Rows: 571\n", "\n", "Data:\n", "+--------------------+--------------------------------+\n", "| song_id | title |\n", "+--------------------+--------------------------------+\n", "| SOMMLRO12A6D4F9FD1 | Must Have Done Something Right |\n", "| SOCLCYG12A6D4FDC71 | Point/Counterpoint (Album ... |\n", "| SOINPKF12A6D4FDC75 | A Better Place_ A Better T... |\n", "| SOMMLRO12A6D4F9FD1 | Must Have Done Something Right |\n", "| SOMMLRO12A6D4F9FD1 | Must Have Done Something Right |\n", "| SOMMLRO12A6D4F9FD1 | Must Have Done Something Right |\n", "| SOPSQOS12A6D4F9E15 | High Of 75 (Album Version) |\n", "| SOSTGAF12A58A7B18B | The Best Thing |\n", "| SOUUYHK12A6D4F9E16 | I So Hate Consequences (Al... |\n", "| SOMMLRO12A6D4F9FD1 | Must Have Done Something Right |\n", "+--------------------+--------------------------------+\n", "+--------------------------------+-----------------------+------+-----------+--------------+\n", "| release | artist_name | year | my_rating | title_length |\n", "+--------------------------------+-----------------------+------+-----------+--------------+\n", "| Must Have Done Something Right | Relient K | 2007 | 0.0 | 5 |\n", "| Everything Goes Numb | Streetlight Manifesto | 2003 | 0.0 | 3 |\n", "| Everything Goes Numb | Streetlight Manifesto | 2003 | 0.0 | 8 |\n", "| Must Have Done Something Right | Relient K | 2007 | 0.0 | 5 |\n", "| Must Have Done Something Right | Relient K | 2007 | 0.0 | 5 |\n", "| Must Have Done Something Right | Relient K | 2007 | 0.0 | 5 |\n", "| MMHMM | Relient K | 2004 | 0.0 | 5 |\n", "| Five Score And Seven Years Ago | Relient K | 2007 | 0.0 | 3 |\n", "| MMHMM | Relient K | 2004 | 0.0 | 6 |\n", "| Must Have Done Something Right | Relient K | 2007 | 0.0 | 5 |\n", "+--------------------------------+-----------------------+------+-----------+--------------+\n", "+---------------+------+------+---------+-------------------------------+--------------+\n", "| how_old_was_i | col1 | col2 | no_of_a | user_id | listen_count |\n", "+---------------+------+------+---------+-------------------------------+--------------+\n", "| 18 | 5 | 0 | 2 | 3f9ed694a79835c921ef6d94ac... | 3 |\n", "| 14 | 3 | 0 | 1 | 956044d724390e40c8511b49e5... | 1 |\n", "| 14 | 8 | 0 | 2 | 956044d724390e40c8511b49e5... | 1 |\n", "| 18 | 5 | 0 | 2 | 354cfdb566f543bb5b810a4d89... | 1 |\n", "| 18 | 5 | 0 | 2 | e4d4e50c99ed5b3dfe740fa1cc... | 14 |\n", "| 18 | 5 | 0 | 2 | ed0f4979ffddc0010244638a3b... | 3 |\n", "| 15 | 5 | 0 | 0 | ed0f4979ffddc0010244638a3b... | 2 |\n", "| 18 | 3 | 0 | 1 | ed0f4979ffddc0010244638a3b... | 1 |\n", "| 15 | 6 | 0 | 1 | ed0f4979ffddc0010244638a3b... | 1 |\n", "| 18 | 5 | 0 | 2 | 0e741cfe121bb619177be8d8a1... | 4 |\n", "+---------------+------+------+---------+-------------------------------+--------------+\n", "[571 rows x 13 columns]\n", "Note: Only the head of the SFrame is printed.\n", "You can use print_rows(num_rows=m, num_columns=n) to print more rows and columns." ] }, "execution_count": 42, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Let's join ds1 with the 10000.txt dataset usage_data\n", "dsjoin = ds1.join(usage_data, 'song_id')\n", "dsjoin" ] }, { "cell_type": "code", "execution_count": 43, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "(64, 571)" ] }, "execution_count": 43, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# total row in ds1 and dsjoin datasets\n", "len(ds1), len(dsjoin)" ] }, { "cell_type": "code", "execution_count": 44, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "(64, 7, 64, 571)" ] }, "execution_count": 44, "metadata": {}, "output_type": "execute_result" } ], "source": [ "len(ds1['song_id'].unique()), len(dsjoin['song_id'].unique()), len(ds1['song_id']), len(dsjoin['song_id'])" ] }, { "cell_type": "code", "execution_count": 45, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
song_idnum_unique_userstotal_listen_count
SOCVOVH12A6D4FB912107555
SOPSQOS12A6D4F9E1579207
SOMMLRO12A6D4F9FD1104331
SOUUYHK12A6D4F9E1679171
SOINPKF12A6D4FDC7563196
SOSTGAF12A58A7B18B72168
SOCLCYG12A6D4FDC7167188
\n", "[7 rows x 3 columns]
\n", "
" ], "text/plain": [ "Columns:\n", "\tsong_id\tstr\n", "\tnum_unique_users\tint\n", "\ttotal_listen_count\tint\n", "\n", "Rows: 7\n", "\n", "Data:\n", "+--------------------+------------------+--------------------+\n", "| song_id | num_unique_users | total_listen_count |\n", "+--------------------+------------------+--------------------+\n", "| SOCVOVH12A6D4FB912 | 107 | 555 |\n", "| SOPSQOS12A6D4F9E15 | 79 | 207 |\n", "| SOMMLRO12A6D4F9FD1 | 104 | 331 |\n", "| SOUUYHK12A6D4F9E16 | 79 | 171 |\n", "| SOINPKF12A6D4FDC75 | 63 | 196 |\n", "| SOSTGAF12A58A7B18B | 72 | 168 |\n", "| SOCLCYG12A6D4FDC71 | 67 | 188 |\n", "+--------------------+------------------+--------------------+\n", "[7 rows x 3 columns]" ] }, "execution_count": 45, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# find out most popular songs when I was between 14 n 18\n", "most_popular = dsjoin.groupby(['song_id'], {'total_listen_count':gl.aggregate.SUM('listen_count'), \n", " 'num_unique_users':gl.aggregate.COUNT('user_id')})\n", "most_popular" ] }, { "cell_type": "code", "execution_count": 46, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
song_idnum_unique_userstotal_listen_counttitlerelease
SOCVOVH12A6D4FB912107555Keasbey Nights (LP
Version) ...
Keasbey Nights
SOMMLRO12A6D4F9FD1104331Must Have Done Something
Right ...
Must Have Done Something
Right ...
SOPSQOS12A6D4F9E1579207High Of 75 (Album
Version) ...
MMHMM
SOINPKF12A6D4FDC7563196A Better Place_ A Better
Time (Album Version) ...
Everything Goes Numb
SOCLCYG12A6D4FDC7167188Point/Counterpoint (Album
Version) ...
Everything Goes Numb
SOUUYHK12A6D4F9E1679171I So Hate Consequences
(Album Version) ...
MMHMM
SOSTGAF12A58A7B18B72168The Best ThingFive Score And Seven
Years Ago ...
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
artist_nameyearmy_ratingtitle_lengthhow_old_was_icol1col2no_of_a
Streetlight Manifesto20060.0417403
Relient K20070.0518502
Relient K20040.0515500
Streetlight Manifesto20030.0814802
Streetlight Manifesto20030.0314301
Relient K20040.0615601
Relient K20070.0318301
\n", "[7 rows x 13 columns]
\n", "
" ], "text/plain": [ "Columns:\n", "\tsong_id\tstr\n", "\tnum_unique_users\tint\n", "\ttotal_listen_count\tint\n", "\ttitle\tstr\n", "\trelease\tstr\n", "\tartist_name\tstr\n", "\tyear\tint\n", "\tmy_rating\tfloat\n", "\ttitle_length\tint\n", "\thow_old_was_i\tint\n", "\tcol1\tint\n", "\tcol2\tint\n", "\tno_of_a\tint\n", "\n", "Rows: 7\n", "\n", "Data:\n", "+--------------------+------------------+--------------------+\n", "| song_id | num_unique_users | total_listen_count |\n", "+--------------------+------------------+--------------------+\n", "| SOCVOVH12A6D4FB912 | 107 | 555 |\n", "| SOMMLRO12A6D4F9FD1 | 104 | 331 |\n", "| SOPSQOS12A6D4F9E15 | 79 | 207 |\n", "| SOINPKF12A6D4FDC75 | 63 | 196 |\n", "| SOCLCYG12A6D4FDC71 | 67 | 188 |\n", "| SOUUYHK12A6D4F9E16 | 79 | 171 |\n", "| SOSTGAF12A58A7B18B | 72 | 168 |\n", "+--------------------+------------------+--------------------+\n", "+--------------------------------+--------------------------------+\n", "| title | release |\n", "+--------------------------------+--------------------------------+\n", "| Keasbey Nights (LP Version) | Keasbey Nights |\n", "| Must Have Done Something Right | Must Have Done Something Right |\n", "| High Of 75 (Album Version) | MMHMM |\n", "| A Better Place_ A Better T... | Everything Goes Numb |\n", "| Point/Counterpoint (Album ... | Everything Goes Numb |\n", "| I So Hate Consequences (Al... | MMHMM |\n", "| The Best Thing | Five Score And Seven Years Ago |\n", "+--------------------------------+--------------------------------+\n", "+-----------------------+------+-----------+--------------+---------------+------+\n", "| artist_name | year | my_rating | title_length | how_old_was_i | col1 |\n", "+-----------------------+------+-----------+--------------+---------------+------+\n", "| Streetlight Manifesto | 2006 | 0.0 | 4 | 17 | 4 |\n", "| Relient K | 2007 | 0.0 | 5 | 18 | 5 |\n", "| Relient K | 2004 | 0.0 | 5 | 15 | 5 |\n", "| Streetlight Manifesto | 2003 | 0.0 | 8 | 14 | 8 |\n", "| Streetlight Manifesto | 2003 | 0.0 | 3 | 14 | 3 |\n", "| Relient K | 2004 | 0.0 | 6 | 15 | 6 |\n", "| Relient K | 2007 | 0.0 | 3 | 18 | 3 |\n", "+-----------------------+------+-----------+--------------+---------------+------+\n", "+------+---------+\n", "| col2 | no_of_a |\n", "+------+---------+\n", "| 0 | 3 |\n", "| 0 | 2 |\n", "| 0 | 0 |\n", "| 0 | 2 |\n", "| 0 | 1 |\n", "| 0 | 1 |\n", "| 0 | 1 |\n", "+------+---------+\n", "[7 rows x 13 columns]" ] }, "execution_count": 46, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# to get artist name we have to join this data\n", "most_popular.join(sdata, 'song_id').topk('total_listen_count',k=20)" ] }, { "cell_type": "code", "execution_count": 47, "metadata": { "collapsed": true }, "outputs": [], "source": [ "# let's append a row with max liste count and check whether it comes in above result or not\n", "me = gl.SFrame({'user_id':['evan'],'song_id':['SOSFAVU12A6D4FDC6A'],'listen_count':[4000]})\n", "# adding this data to usage data\n", "usage_data = usage_data.append(me)" ] }, { "cell_type": "code", "execution_count": 48, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
song_idnum_unique_userstotal_listen_counttitlerelease
SOSFAVU12A6D4FDC6A14000Everything Went Numb
(Album Version) ...
Everything Goes Numb
SOCVOVH12A6D4FB912107555Keasbey Nights (LP
Version) ...
Keasbey Nights
SOMMLRO12A6D4F9FD1104331Must Have Done Something
Right ...
Must Have Done Something
Right ...
SOPSQOS12A6D4F9E1579207High Of 75 (Album
Version) ...
MMHMM
SOINPKF12A6D4FDC7563196A Better Place_ A Better
Time (Album Version) ...
Everything Goes Numb
SOCLCYG12A6D4FDC7167188Point/Counterpoint (Album
Version) ...
Everything Goes Numb
SOUUYHK12A6D4F9E1679171I So Hate Consequences
(Album Version) ...
MMHMM
SOSTGAF12A58A7B18B72168The Best ThingFive Score And Seven
Years Ago ...
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
artist_nameyearmy_ratingtitle_lengthhow_old_was_icol1col2no_of_a
Streetlight Manifesto20030.0514501
Streetlight Manifesto20060.0417403
Relient K20070.0518502
Relient K20040.0515500
Streetlight Manifesto20030.0814802
Streetlight Manifesto20030.0314301
Relient K20040.0615601
Relient K20070.0318301
\n", "[8 rows x 13 columns]
\n", "
" ], "text/plain": [ "Columns:\n", "\tsong_id\tstr\n", "\tnum_unique_users\tint\n", "\ttotal_listen_count\tint\n", "\ttitle\tstr\n", "\trelease\tstr\n", "\tartist_name\tstr\n", "\tyear\tint\n", "\tmy_rating\tfloat\n", "\ttitle_length\tint\n", "\thow_old_was_i\tint\n", "\tcol1\tint\n", "\tcol2\tint\n", "\tno_of_a\tint\n", "\n", "Rows: 8\n", "\n", "Data:\n", "+--------------------+------------------+--------------------+\n", "| song_id | num_unique_users | total_listen_count |\n", "+--------------------+------------------+--------------------+\n", "| SOSFAVU12A6D4FDC6A | 1 | 4000 |\n", "| SOCVOVH12A6D4FB912 | 107 | 555 |\n", "| SOMMLRO12A6D4F9FD1 | 104 | 331 |\n", "| SOPSQOS12A6D4F9E15 | 79 | 207 |\n", "| SOINPKF12A6D4FDC75 | 63 | 196 |\n", "| SOCLCYG12A6D4FDC71 | 67 | 188 |\n", "| SOUUYHK12A6D4F9E16 | 79 | 171 |\n", "| SOSTGAF12A58A7B18B | 72 | 168 |\n", "+--------------------+------------------+--------------------+\n", "+--------------------------------+--------------------------------+\n", "| title | release |\n", "+--------------------------------+--------------------------------+\n", "| Everything Went Numb (Albu... | Everything Goes Numb |\n", "| Keasbey Nights (LP Version) | Keasbey Nights |\n", "| Must Have Done Something Right | Must Have Done Something Right |\n", "| High Of 75 (Album Version) | MMHMM |\n", "| A Better Place_ A Better T... | Everything Goes Numb |\n", "| Point/Counterpoint (Album ... | Everything Goes Numb |\n", "| I So Hate Consequences (Al... | MMHMM |\n", "| The Best Thing | Five Score And Seven Years Ago |\n", "+--------------------------------+--------------------------------+\n", "+-----------------------+------+-----------+--------------+---------------+------+\n", "| artist_name | year | my_rating | title_length | how_old_was_i | col1 |\n", "+-----------------------+------+-----------+--------------+---------------+------+\n", "| Streetlight Manifesto | 2003 | 0.0 | 5 | 14 | 5 |\n", "| Streetlight Manifesto | 2006 | 0.0 | 4 | 17 | 4 |\n", "| Relient K | 2007 | 0.0 | 5 | 18 | 5 |\n", "| Relient K | 2004 | 0.0 | 5 | 15 | 5 |\n", "| Streetlight Manifesto | 2003 | 0.0 | 8 | 14 | 8 |\n", "| Streetlight Manifesto | 2003 | 0.0 | 3 | 14 | 3 |\n", "| Relient K | 2004 | 0.0 | 6 | 15 | 6 |\n", "| Relient K | 2007 | 0.0 | 3 | 18 | 3 |\n", "+-----------------------+------+-----------+--------------+---------------+------+\n", "+------+---------+\n", "| col2 | no_of_a |\n", "+------+---------+\n", "| 0 | 1 |\n", "| 0 | 3 |\n", "| 0 | 2 |\n", "| 0 | 0 |\n", "| 0 | 2 |\n", "| 0 | 1 |\n", "| 0 | 1 |\n", "| 0 | 1 |\n", "+------+---------+\n", "[8 rows x 13 columns]" ] }, "execution_count": 48, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# repeating the above join n group by statement\n", "dsjoin = ds1.join(usage_data, 'song_id')\n", "most_popular = dsjoin.groupby(['song_id'], {'total_listen_count':gl.aggregate.SUM('listen_count'), \n", " 'num_unique_users':gl.aggregate.COUNT('user_id')})\n", "most_popular.join(sdata, 'song_id').topk('total_listen_count',k=20)\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Splitting and Sampling\n", "Lets check, how we can randomly split the data for test\n" ] }, { "cell_type": "code", "execution_count": 49, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "(800135, 199865)" ] }, "execution_count": 49, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Randomly split data rows into two subsets\n", "first_set, second_set = sdata.random_split(0.8, seed = 1)\n", "first_set.num_rows(), second_set.num_rows()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "If you want to split on a predicate though, you'll have to do that manually." ] }, { "cell_type": "code", "execution_count": 50, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "(75581, 439995)" ] }, "execution_count": 50, "metadata": {}, "output_type": "execute_result" } ], "source": [ "songs_before = sdata[sdata['how_old_was_i'] < 0]\n", "songs_after = sdata[sdata['how_old_was_i'] >= 0]\n", "songs_before.num_rows(), songs_after.num_rows()" ] }, { "cell_type": "code", "execution_count": 51, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "399454" ] }, "execution_count": 51, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# generating sample data\n", "sample = sdata.sample(0.4)\n", "sample.num_rows()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### SArray" ] }, { "cell_type": "code", "execution_count": 52, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "dtype: int\n", "Rows: 3\n", "[1L, 2L, 3L]" ] }, "execution_count": 52, "metadata": {}, "output_type": "execute_result" } ], "source": [ "arr = gl.SArray([1,2,3])\n", "arr" ] }, { "cell_type": "code", "execution_count": 53, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "dtype: int\n", "Rows: 3\n", "[2L, 4L, 6L]" ] }, "execution_count": 53, "metadata": {}, "output_type": "execute_result" } ], "source": [ "arr2 = 2*arr\n", "arr2" ] }, { "cell_type": "code", "execution_count": 54, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "dtype: int\n", "Rows: 3\n", "[3L, 6L, 9L]" ] }, "execution_count": 54, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# add\n", "arr + arr2" ] }, { "cell_type": "code", "execution_count": 55, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "dtype: int\n", "Rows: 3\n", "[2L, 8L, 18L]" ] }, "execution_count": 55, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# multiply\n", "arr * arr2" ] }, { "cell_type": "code", "execution_count": 56, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "dtype: float\n", "Rows: 3\n", "[2.0, 2.0, 2.0]" ] }, "execution_count": 56, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# divide\n", "arr2 / arr" ] }, { "cell_type": "code", "execution_count": 57, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Whoa that's long!\n", "Whoa that's long!\n", "Whoa that's long!\n", "Whoa that's long!\n" ] } ], "source": [ "# iterating with SFrame\n", "for i in sdata:\n", " if i['title_length'] >= 45:\n", " print \"Whoa that's long!\"" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Using apply function on SFrame" ] }, { "cell_type": "code", "execution_count": 58, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
song_idtitlereleaseartist_nameyearmy_rating
SOQMMHC12AB0180CB8Silent NightMonster Ballads X-MasFaster Pussy cat20030.0
SOVFVAK12A8C1350D9Tanssi vaanKarkuteilläKarkkiautomaatti19950.0
SOGTUKN12AB017F4F1No One Could EverButterHudson Mohawke20060.0
SOBNYVR12A8C13558CSi Vos QuerésDe CuloYerba Brava20030.0
SOHSBXH12A8C13B0DFTangle Of AspensRene Ablaze Presents
Winter Sessions ...
Der MysticNone0.0
SOZVAPQ12A8C13B63CSymphony No. 1 G minor
\"Sinfonie ...
Berwald: Symphonies Nos.
1/2/3/4 ...
David MontgomeryNone0.0
SOQVRHI12A6D4FB2D7We Have Got LoveStrictly The Best Vol. 34Sasha / TurbulenceNone0.0
SOEYRFT12AB018936C2 Da Beat Ch'yallDa BombKris Kross19930.0
SOPMIYT12A6D4F851EGoodbyeDanny BoyJoseph LockeNone0.0
SOJCFMH12A8C13B0C2Mama_ mama can't you see
? ...
March to cadence with the
US marines ...
The Sun Harbor's Chorus-
Documentary Recordings ...
None0.0
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
title_lengthhow_old_was_icol1col2no_of_atitle_artist_length
21420528
2620827
41740131
31430325
3None30226
9None90378
4None40334
4440427
1None10119
6None601072
\n", "[1000000 rows x 12 columns]
Note: Only the head of the SFrame is printed.
You can use print_rows(num_rows=m, num_columns=n) to print more rows and columns.\n", "
" ], "text/plain": [ "Columns:\n", "\tsong_id\tstr\n", "\ttitle\tstr\n", "\trelease\tstr\n", "\tartist_name\tstr\n", "\tyear\tint\n", "\tmy_rating\tfloat\n", "\ttitle_length\tint\n", "\thow_old_was_i\tint\n", "\tcol1\tint\n", "\tcol2\tint\n", "\tno_of_a\tint\n", "\ttitle_artist_length\tint\n", "\n", "Rows: 1000000\n", "\n", "Data:\n", "+--------------------+-------------------------------+\n", "| song_id | title |\n", "+--------------------+-------------------------------+\n", "| SOQMMHC12AB0180CB8 | Silent Night |\n", "| SOVFVAK12A8C1350D9 | Tanssi vaan |\n", "| SOGTUKN12AB017F4F1 | No One Could Ever |\n", "| SOBNYVR12A8C13558C | Si Vos Querés |\n", "| SOHSBXH12A8C13B0DF | Tangle Of Aspens |\n", "| SOZVAPQ12A8C13B63C | Symphony No. 1 G minor \"Si... |\n", "| SOQVRHI12A6D4FB2D7 | We Have Got Love |\n", "| SOEYRFT12AB018936C | 2 Da Beat Ch'yall |\n", "| SOPMIYT12A6D4F851E | Goodbye |\n", "| SOJCFMH12A8C13B0C2 | Mama_ mama can't you see ? |\n", "+--------------------+-------------------------------+\n", "+-------------------------------+-------------------------------+------+-----------+\n", "| release | artist_name | year | my_rating |\n", "+-------------------------------+-------------------------------+------+-----------+\n", "| Monster Ballads X-Mas | Faster Pussy cat | 2003 | 0.0 |\n", "| Karkuteillä | Karkkiautomaatti | 1995 | 0.0 |\n", "| Butter | Hudson Mohawke | 2006 | 0.0 |\n", "| De Culo | Yerba Brava | 2003 | 0.0 |\n", "| Rene Ablaze Presents Winte... | Der Mystic | None | 0.0 |\n", "| Berwald: Symphonies Nos. 1... | David Montgomery | None | 0.0 |\n", "| Strictly The Best Vol. 34 | Sasha / Turbulence | None | 0.0 |\n", "| Da Bomb | Kris Kross | 1993 | 0.0 |\n", "| Danny Boy | Joseph Locke | None | 0.0 |\n", "| March to cadence with the ... | The Sun Harbor's Chorus-Do... | None | 0.0 |\n", "+-------------------------------+-------------------------------+------+-----------+\n", "+--------------+---------------+------+------+---------+---------------------+\n", "| title_length | how_old_was_i | col1 | col2 | no_of_a | title_artist_length |\n", "+--------------+---------------+------+------+---------+---------------------+\n", "| 2 | 14 | 2 | 0 | 5 | 28 |\n", "| 2 | 6 | 2 | 0 | 8 | 27 |\n", "| 4 | 17 | 4 | 0 | 1 | 31 |\n", "| 3 | 14 | 3 | 0 | 3 | 25 |\n", "| 3 | None | 3 | 0 | 2 | 26 |\n", "| 9 | None | 9 | 0 | 3 | 78 |\n", "| 4 | None | 4 | 0 | 3 | 34 |\n", "| 4 | 4 | 4 | 0 | 4 | 27 |\n", "| 1 | None | 1 | 0 | 1 | 19 |\n", "| 6 | None | 6 | 0 | 10 | 72 |\n", "+--------------+---------------+------+------+---------+---------------------+\n", "[1000000 rows x 12 columns]\n", "Note: Only the head of the SFrame is printed.\n", "You can use print_rows(num_rows=m, num_columns=n) to print more rows and columns." ] }, "execution_count": 58, "metadata": {}, "output_type": "execute_result" } ], "source": [ "sdata['title_artist_length'] = sdata['title','artist_name'].apply(lambda row: sum([len(col) for col in row.values()]))\n", "sdata" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Saving Our Work" ] }, { "cell_type": "code", "execution_count": 59, "metadata": { "collapsed": true }, "outputs": [], "source": [ "# save as csv\n", "sdata.save('sdata_new.csv', format='csv')" ] }, { "cell_type": "code", "execution_count": 60, "metadata": { "collapsed": true }, "outputs": [], "source": [ "sdata.save('sdata_new')" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 2", "language": "python", "name": "python2" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 2 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython2", "version": "2.7.11" } }, "nbformat": 4, "nbformat_minor": 0 }