{ "metadata": { "name": "", "signature": "sha256:7065ad060aa986ac339e0718759ff240db127f35ad76fd918d73a13393776909" }, "nbformat": 3, "nbformat_minor": 0, "worksheets": [ { "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "
SELECT *,dered_u - mag_u AS diff_u, dered_g - mag_g AS diff_g, dered_r - mag_r AS diff_g, dered_i - mag_i AS diff_i, dered_z - mag_z AS diff_z from\n", "(SELECT top 1000\n", "objid, ra, dec, dered_u,dered_g,dered_r,dered_i,dered_z,psfmag_u-extinction_u AS mag_u,\n", "psfmag_g-extinction_g AS mag_g, psfmag_r-extinction_r AS mag_r, psfmag_i-extinction_i AS mag_i,psfmag_z-extinction_z AS mag_z,z AS spec_z,dered_u - dered_g AS u_g_color, \n", "dered_g - dered_r AS g_r_color,dered_r - dered_i AS r_i_color,dered_i - dered_z AS i_z_color,class\n", "FROM SpecPhoto \n", "WHERE \n", " (class = 'QSO')\n", " ) as sp\n", "\n", " \n", "Saving this and others like it as a `csv` we can then start to make our data set for classification/regression." ] }, { "cell_type": "code", "collapsed": false, "input": [ "## get the data locally ... I put this on a gist\n", "!curl -k -O https://gist.githubusercontent.com/anonymous/53781fe86383c435ff10/raw/4cc80a638e8e083775caec3005ae2feaf92b8d5b/qso10000.csv\n", "!curl -k -O https://gist.githubusercontent.com/anonymous/2984cf01a2485afd2c3e/raw/964d4f52c989428628d42eb6faad5e212e79b665/star1000.csv\n", "!curl -k -O https://gist.githubusercontent.com/anonymous/2984cf01a2485afd2c3e/raw/335cd1953e72f6c7cafa9ebb81b43c47cb757a9d/galaxy1000.csv" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ " % Total % Received % Xferd Average Speed Time Time Time Current\r\n", " Dload Upload Total Spent Left Speed\r\n", "\r", " 0 0 0 0 0 0 0 0 --:--:-- --:--:-- --:--:-- 0" ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\r", "100 31763 0 31763 0 0 52127 0 --:--:-- --:--:-- --:--:-- 53653" ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\r", "100 434k 0 434k 0 0 268k 0 --:--:-- 0:00:01 --:--:-- 271k" ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\r", "100 513k 0 513k 0 0 198k 0 --:--:-- 0:00:02 --:--:-- 199k" ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\r", "100 633k 0 633k 0 0 178k 0 --:--:-- 0:00:03 --:--:-- 178k" ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\r", "100 814k 0 814k 0 0 178k 0 --:--:-- 0:00:04 --:--:-- 179k" ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\r", "100 958k 0 958k 0 0 172k 0 --:--:-- 0:00:05 --:--:-- 187k" ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\r", "100 1086k 0 1086k 0 0 164k 0 --:--:-- 0:00:06 --:--:-- 130k" ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\r", "100 1267k 0 1267k 0 0 167k 0 --:--:-- 0:00:07 --:--:-- 150k" ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\r", "100 1505k 0 1505k 0 0 175k 0 --:--:-- 0:00:08 --:--:-- 173k" ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\r", "100 1939k 0 1939k 0 0 202k 0 --:--:-- 0:00:09 --:--:-- 223k" ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\r", "100 2378k 0 2378k 0 0 235k 0 --:--:-- 0:00:10 --:--:-- 311k\r\n" ] } ], "prompt_number": 61 }, { "cell_type": "code", "collapsed": false, "input": [ "# For pretty plotting\n", "!pip install --upgrade seaborn" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "Requirement already up-to-date: seaborn in /Users/jbloom/anaconda/lib/python2.7/site-packages\r\n", "Cleaning up...\r\n" ] } ], "prompt_number": 1 }, { "cell_type": "code", "collapsed": false, "input": [ "import pandas as pd\n", "pd.set_option('display.max_columns', None)\n", "%pylab inline\n", "import seaborn as sns\n", "sns.set()\n", "import copy" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "Populating the interactive namespace from numpy and matplotlib\n" ] } ], "prompt_number": 1 }, { "cell_type": "code", "collapsed": false, "input": [ "pd.read_csv(\"qso10000.csv\",index_col=0).head()" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "
\n", " | ra | \n", "dec | \n", "dered_u | \n", "dered_g | \n", "dered_r | \n", "dered_i | \n", "dered_z | \n", "mag_u | \n", "mag_g | \n", "mag_r | \n", "mag_i | \n", "mag_z | \n", "spec_z | \n", "u_g_color | \n", "g_r_color | \n", "r_i_color | \n", "i_z_color | \n", "class | \n", "diff_u | \n", "diff_g | \n", "diff_g1 | \n", "diff_i | \n", "diff_z | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
objid | \n", "\n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " |
1237648720142532813 | \n", "146.90229 | \n", "-0.984913 | \n", "19.64289 | \n", "19.31131 | \n", "19.25328 | \n", "19.15353 | \n", "19.13345 | \n", "19.71604 | \n", "19.37595 | \n", "19.32818 | \n", "19.24847 | \n", "19.21259 | \n", "0.652417 | \n", "0.331583 | \n", "0.058027 | \n", "0.099751 | \n", "0.020077 | \n", "QSO | \n", "-0.073151 | \n", "-0.064648 | \n", "-0.074903 | \n", "-0.094942 | \n", "-0.079136 | \n", "
1237658425156829371 | \n", "142.45853 | \n", "6.646406 | \n", "19.39569 | \n", "19.34811 | \n", "19.16626 | \n", "18.93152 | \n", "19.06013 | \n", "19.40327 | \n", "19.36566 | \n", "19.18335 | \n", "18.94222 | \n", "19.08077 | \n", "1.537123 | \n", "0.047575 | \n", "0.181847 | \n", "0.234743 | \n", "-0.128612 | \n", "QSO | \n", "-0.007589 | \n", "-0.017550 | \n", "-0.017090 | \n", "-0.010700 | \n", "-0.020636 | \n", "
1237660413189095710 | \n", "143.15770 | \n", "8.175363 | \n", "19.10362 | \n", "18.88904 | \n", "18.70672 | \n", "18.58508 | \n", "18.61328 | \n", "19.11102 | \n", "18.88857 | \n", "18.70458 | \n", "18.57886 | \n", "18.62583 | \n", "1.467101 | \n", "0.214582 | \n", "0.182318 | \n", "0.121645 | \n", "-0.028202 | \n", "QSO | \n", "-0.007397 | \n", "0.000473 | \n", "0.002148 | \n", "0.006218 | \n", "-0.012548 | \n", "
1237660412651962520 | \n", "142.49264 | \n", "7.800945 | \n", "19.88820 | \n", "19.75146 | \n", "19.52941 | \n", "19.65000 | \n", "19.52470 | \n", "19.88709 | \n", "19.75292 | \n", "19.53512 | \n", "19.67052 | \n", "19.50256 | \n", "1.014217 | \n", "0.136745 | \n", "0.222052 | \n", "-0.120590 | \n", "0.125301 | \n", "QSO | \n", "0.001118 | \n", "-0.001457 | \n", "-0.005716 | \n", "-0.020527 | \n", "0.022139 | \n", "
1237658493336944662 | \n", "142.64367 | \n", "7.917698 | \n", "18.45897 | \n", "18.40651 | \n", "18.15901 | \n", "17.77130 | \n", "17.75986 | \n", "18.55725 | \n", "18.55002 | \n", "18.40316 | \n", "18.01008 | \n", "18.03100 | \n", "0.215603 | \n", "0.052462 | \n", "0.247498 | \n", "0.387709 | \n", "0.011444 | \n", "QSO | \n", "-0.098282 | \n", "-0.143515 | \n", "-0.244150 | \n", "-0.238779 | \n", "-0.271137 | \n", "
5 rows \u00d7 23 columns
\n", "\n", " | dered_r | \n", "u_g_color | \n", "g_r_color | \n", "r_i_color | \n", "i_z_color | \n", "diff_u | \n", "diff_g1 | \n", "diff_i | \n", "diff_z | \n", "
---|---|---|---|---|---|---|---|---|---|
objid | \n", "\n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " |
1237648720142532813 | \n", "19.25328 | \n", "0.331583 | \n", "0.058027 | \n", "0.099751 | \n", "0.020077 | \n", "-0.073151 | \n", "-0.074903 | \n", "-0.094942 | \n", "-0.079136 | \n", "
1237658425156829371 | \n", "19.16626 | \n", "0.047575 | \n", "0.181847 | \n", "0.234743 | \n", "-0.128612 | \n", "-0.007589 | \n", "-0.017090 | \n", "-0.010700 | \n", "-0.020636 | \n", "
1237660413189095710 | \n", "18.70672 | \n", "0.214582 | \n", "0.182318 | \n", "0.121645 | \n", "-0.028202 | \n", "-0.007397 | \n", "0.002148 | \n", "0.006218 | \n", "-0.012548 | \n", "
1237660412651962520 | \n", "19.52941 | \n", "0.136745 | \n", "0.222052 | \n", "-0.120590 | \n", "0.125301 | \n", "0.001118 | \n", "-0.005716 | \n", "-0.020527 | \n", "0.022139 | \n", "
1237658493336944662 | \n", "18.15901 | \n", "0.052462 | \n", "0.247498 | \n", "0.387709 | \n", "0.011444 | \n", "-0.098282 | \n", "-0.244150 | \n", "-0.238779 | \n", "-0.271137 | \n", "
5 rows \u00d7 9 columns
\n", "\n", " | dered_r | \n", "u_g_color | \n", "g_r_color | \n", "r_i_color | \n", "i_z_color | \n", "diff_u | \n", "diff_g1 | \n", "diff_i | \n", "diff_z | \n", "
---|---|---|---|---|---|---|---|---|---|
objid | \n", "\n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " |
1237657775542632759 | \n", "15.42325 | \n", "1.999353 | \n", "0.970126 | \n", "0.435975 | \n", "0.373470 | \n", "-1.944487 | \n", "-1.971534 | \n", "-2.052320 | \n", "-1.971382 | \n", "
1237657775542698090 | \n", "17.51366 | \n", "2.212025 | \n", "0.965242 | \n", "0.410664 | \n", "0.371384 | \n", "-0.778788 | \n", "-0.944075 | \n", "-0.895832 | \n", "-0.830559 | \n", "
1237657775542698177 | \n", "17.15747 | \n", "1.190033 | \n", "0.332136 | \n", "0.252352 | \n", "0.070980 | \n", "-2.391565 | \n", "-2.977261 | \n", "-2.889906 | \n", "-2.671612 | \n", "
1237657630586634463 | \n", "17.19312 | \n", "1.179663 | \n", "0.678915 | \n", "0.394419 | \n", "0.272171 | \n", "-1.563450 | \n", "-1.913368 | \n", "-1.791895 | \n", "-1.615683 | \n", "
1237657630049698007 | \n", "17.20485 | \n", "1.925320 | \n", "1.126934 | \n", "0.477961 | \n", "0.334377 | \n", "-1.211906 | \n", "-1.377165 | \n", "-1.402037 | \n", "-1.218332 | \n", "
5 rows \u00d7 9 columns
\n", "