{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"# Generate data for SVM classifier with L1 regularization.\n",
"srand(1);\n",
"n = 20;\n",
"m = 1000;\n",
"TEST = m;\n",
"DENSITY = 0.2;\n",
"beta_true = randn(n,1);\n",
"idxs = randperm(n)[1:int((1-DENSITY)*n)];\n",
"for idx in idxs\n",
" beta_true[idx] = 0;\n",
"end\n",
"offset = 0;\n",
"sigma = 45;\n",
"X = 5 * randn(m, n);\n",
"Y = sign(X * beta_true + offset + sigma * randn(m,1));\n",
"X_test = 5 * randn(TEST, n);"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"# Form SVM with L1 regularization problem.\n",
"using Convex, SCS\n",
"set_default_solver(SCSSolver(verbose=0));\n",
"beta = Variable(n);\n",
"v = Variable();\n",
"loss = sum(pos(1 - Y .* (X*beta - v)));\n",
"reg = norm(beta, 1);\n",
"\n",
"# Compute a trade-off curve and record train and test error.\n",
"TRIALS = 100\n",
"train_error = zeros(TRIALS);\n",
"test_error = zeros(TRIALS);\n",
"lambda_vals = logspace(-2, 0, TRIALS);\n",
"beta_vals = zeros(length(beta), TRIALS);\n",
"for i = 1:TRIALS\n",
" lambda = lambda_vals[i];\n",
" problem = minimize(loss/m + lambda*reg);\n",
" solve!(problem);\n",
" train_error[i] = sum(float(sign(X*beta_true + offset) .!= sign(evaluate(X*beta - v))))/m;\n",
" test_error[i] = sum(float(sign(X_test*beta_true + offset) .!= sign(evaluate(X_test*beta - v))))/TEST;\n",
" beta_vals[:, i] = evaluate(beta);\n",
"end"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"image/svg+xml": [
"\n",
"\n"
],
"text/html": [
"\n",
"\n"
],
"text/plain": [
"Plot(...)"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Plot the train and test error over the trade-off curve.\n",
"using Gadfly, DataFrames\n",
"df1 = DataFrame(λ=lambda_vals, errors=train_error, label=\"Train error\");\n",
"df2 = DataFrame(λ=lambda_vals, errors=test_error, label=\"Test error\");\n",
"df = vcat(df1, df2);\n",
"\n",
"plot(df, x=\"λ\", y=\"errors\", color=\"label\", Geom.line,\n",
" Scale.x_log10, Scale.y_log10)"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"image/svg+xml": [
"\n",
"\n"
],
"text/html": [
"\n",
"\n"
],
"text/plain": [
"Plot(...)"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Plot the regularization path for beta.\n",
"df = DataFrame(λ=lambda_vals, betas=vec(beta_vals[1,:]), label=\"beta1\")\n",
"for i=2:n\n",
" df = vcat(df, DataFrame(λ=lambda_vals, betas=vec(beta_vals[i,:]), label=string(\"beta\", i)));\n",
"end\n",
"plot(df, x=\"λ\", y=\"betas\", color=\"label\", Geom.line, Scale.x_log10)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Julia 0.3.9",
"language": "julia",
"name": "julia-0.3"
},
"language_info": {
"name": "julia",
"version": "0.3.9"
}
},
"nbformat": 4,
"nbformat_minor": 0
}