{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": { "collapsed": true }, "outputs": [], "source": [ "# Generate data for Huber regression.\n", "srand(1);\n", "n = 300;\n", "SAMPLES = int(1.5*n);\n", "beta_true = 5*randn(n);\n", "X = randn(n, SAMPLES);\n", "Y = zeros(SAMPLES);\n", "v = randn(SAMPLES);" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "# Generate data for different values of p.\n", "# Solve the resulting problems.\n", "# WARNING this script takes a few minutes to run.\n", "using Convex, SCS, Distributions\n", "set_default_solver(SCSSolver(verbose=0));\n", "TESTS = 50;\n", "lsq_data = zeros(TESTS);\n", "huber_data = zeros(TESTS);\n", "prescient_data = zeros(TESTS);\n", "p_vals = linspace(0,0.15, TESTS);\n", "for i=1:length(p_vals)\n", " p = p_vals[i];\n", " # Generate the sign changes.\n", " factor = float(2 * rand(Binomial(1, 1-p), SAMPLES) - 1);\n", " Y = factor .* X' * beta_true + v;\n", " \n", " # Form and solve a standard regression problem.\n", " beta = Variable(n);\n", " fit = norm(beta - beta_true) / norm(beta_true);\n", " cost = norm(X' * beta - Y);\n", " prob = minimize(cost);\n", " solve!(prob);\n", " lsq_data[i] = evaluate(fit);\n", " \n", " # Form and solve a prescient regression problem,\n", " # i.e., where the sign changes are known.\n", " cost = norm(factor .* (X'*beta) - Y);\n", " solve!(minimize(cost))\n", " prescient_data[i] = evaluate(fit);\n", " \n", " # Form and solve the Huber regression problem.\n", " cost = sum(huber(X' * beta - Y, 1));\n", " solve!(minimize(cost))\n", " huber_data[i] = evaluate(fit);\n", "end" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "collapsed": false }, "outputs": [ { "data": { "image/svg+xml": [ "\n", "\n" ], "text/html": [ "\n", "\n" ], "text/plain": [ "Plot(...)" ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "using Gadfly, DataFrames\n", "df = DataFrame(x=p_vals, y=huber_data, label=\"Huber\");\n", "df = vcat(df, DataFrame(x=p_vals, y=prescient_data, label=\"Prescient\"));\n", "df = vcat(df, DataFrame(x=p_vals, y=lsq_data, label=\"Least squares\"));\n", "plot(df, x=\"x\", y=\"y\", color=\"label\", Geom.line, Guide.XLabel(\"p\"), Guide.YLabel(\"Fit\"))" ] }, { "cell_type": "code", "execution_count": 4, "metadata": { "collapsed": false }, "outputs": [ { "data": { "image/svg+xml": [ "\n", "\n" ], "text/html": [ "\n", "\n" ], "text/plain": [ "Plot(...)" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Plot the relative reconstruction error for Huber and prescient regression,\n", "# zooming in on smaller values of p.\n", "indices = find(p_vals .<= 0.08);\n", "df = DataFrame(x=p_vals[indices], y=huber_data[indices], label=\"Huber\");\n", "df = vcat(df, DataFrame(x=p_vals[indices], y=prescient_data[indices], label=\"Prescient\"));\n", "plot(df, x=\"x\", y=\"y\", color=\"label\", Geom.line, Guide.XLabel(\"p\"), Guide.YLabel(\"Fit\"))" ] } ], "metadata": { "kernelspec": { "display_name": "Julia 0.3.9", "language": "julia", "name": "julia-0.3" }, "language_info": { "name": "julia", "version": "0.3.9" } }, "nbformat": 4, "nbformat_minor": 0 }