{
"metadata": {
"language": "Julia",
"name": "",
"signature": "sha256:762d036dcb86cb29b260dfd4bcd31a252e05df594785a41ce90a92d205acb9c2"
},
"nbformat": 3,
"nbformat_minor": 0,
"worksheets": [
{
"cells": [
{
"cell_type": "code",
"collapsed": false,
"input": [
"using DataFrames\n",
"using RDatasets\n",
"using Convex\n",
"using SCS"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 2
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"# we'll use iris data\n",
"# predict whether the iris species is versicolor using the sepal length and width and petal length and width\n",
"iris = dataset(\"datasets\", \"iris\")\n",
"## outcome variable: +1 for versicolor, -1 otherwise\n",
"iris[:Y] = [species == \"versicolor\" ? 1.0 : -1.0 for species in iris[:Species]]\n",
"Y = array(iris[:Y])\n",
"## create data matrix with one column for each feature (first column corresponds to offset)\n",
"X = [ones(size(iris, 1)) iris[:SepalLength] iris[:SepalWidth] iris[:PetalLength] iris[:PetalWidth]];"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 6
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"# solve the logistic regression problem\n",
"n, p = size(X)\n",
"beta = Variable(p)\n",
"problem = minimize(logisticloss(-Y.*(X*beta)))\n",
"\n",
"solve!(problem, SCSSolver(verbose=false))"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 7
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"# let's see how well the model fits\n",
"using Gadfly\n",
"perm = Base.Sort.sortperm(vec(X*beta.value))\n",
"set_default_plot_size(25cm, 12cm)\n",
"plot(layer(x=1:n,y=(Y[perm]+1)/2,Geom.point),layer(x=1:n,y=logistic(X*beta.value)[perm],Geom.line))"
],
"language": "python",
"metadata": {},
"outputs": [
{
"html": [
"\n",
"\n"
],
"metadata": {},
"output_type": "pyout",
"prompt_number": 5,
"svg": [
"\n",
"\n"
],
"text": [
"Plot(...)"
]
}
],
"prompt_number": 5
}
],
"metadata": {}
}
]
}