{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"# Generate data for Huber regression.\n",
"srand(1);\n",
"n = 300;\n",
"SAMPLES = int(1.5*n);\n",
"beta_true = 5*randn(n);\n",
"X = randn(n, SAMPLES);\n",
"Y = zeros(SAMPLES);\n",
"v = randn(SAMPLES);"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"# Generate data for different values of p.\n",
"# Solve the resulting problems.\n",
"# WARNING this script takes a few minutes to run.\n",
"using Convex, SCS, Distributions\n",
"set_default_solver(SCSSolver(verbose=0));\n",
"TESTS = 50;\n",
"lsq_data = zeros(TESTS);\n",
"huber_data = zeros(TESTS);\n",
"prescient_data = zeros(TESTS);\n",
"p_vals = linspace(0,0.15, TESTS);\n",
"for i=1:length(p_vals)\n",
" p = p_vals[i];\n",
" # Generate the sign changes.\n",
" factor = float(2 * rand(Binomial(1, 1-p), SAMPLES) - 1);\n",
" Y = factor .* X' * beta_true + v;\n",
" \n",
" # Form and solve a standard regression problem.\n",
" beta = Variable(n);\n",
" fit = norm(beta - beta_true) / norm(beta_true);\n",
" cost = norm(X' * beta - Y);\n",
" prob = minimize(cost);\n",
" solve!(prob);\n",
" lsq_data[i] = evaluate(fit);\n",
" \n",
" # Form and solve a prescient regression problem,\n",
" # i.e., where the sign changes are known.\n",
" cost = norm(factor .* (X'*beta) - Y);\n",
" solve!(minimize(cost))\n",
" prescient_data[i] = evaluate(fit);\n",
" \n",
" # Form and solve the Huber regression problem.\n",
" cost = sum(huber(X' * beta - Y, 1));\n",
" solve!(minimize(cost))\n",
" huber_data[i] = evaluate(fit);\n",
"end"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"image/svg+xml": [
"\n",
"\n"
],
"text/html": [
"\n",
"\n"
],
"text/plain": [
"Plot(...)"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"using Gadfly, DataFrames\n",
"df = DataFrame(x=p_vals, y=huber_data, label=\"Huber\");\n",
"df = vcat(df, DataFrame(x=p_vals, y=prescient_data, label=\"Prescient\"));\n",
"df = vcat(df, DataFrame(x=p_vals, y=lsq_data, label=\"Least squares\"));\n",
"plot(df, x=\"x\", y=\"y\", color=\"label\", Geom.line, Guide.XLabel(\"p\"), Guide.YLabel(\"Fit\"))"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"image/svg+xml": [
"\n",
"\n"
],
"text/html": [
"\n",
"\n"
],
"text/plain": [
"Plot(...)"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Plot the relative reconstruction error for Huber and prescient regression,\n",
"# zooming in on smaller values of p.\n",
"indices = find(p_vals .<= 0.08);\n",
"df = DataFrame(x=p_vals[indices], y=huber_data[indices], label=\"Huber\");\n",
"df = vcat(df, DataFrame(x=p_vals[indices], y=prescient_data[indices], label=\"Prescient\"));\n",
"plot(df, x=\"x\", y=\"y\", color=\"label\", Geom.line, Guide.XLabel(\"p\"), Guide.YLabel(\"Fit\"))"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Julia 0.3.9",
"language": "julia",
"name": "julia-0.3"
},
"language_info": {
"name": "julia",
"version": "0.3.9"
}
},
"nbformat": 4,
"nbformat_minor": 0
}