{ "metadata": { "language": "Julia", "name": "", "signature": "sha256:40fa740a8ac5f2466bdc9d3934de4a44e12309605413039aa93e7b01f7e72c9e" }, "nbformat": 3, "nbformat_minor": 0, "worksheets": [ { "cells": [ { "cell_type": "code", "collapsed": false, "input": [ "using DataFrames, Distances, Gadfly\n", "\n", "function normalize(input_df::DataFrame, cols::Array{Int})\n", " norm_df = deepcopy(input_df)\n", " for i in cols\n", " norm_df[i] = (input_df[i] - minimum(input_df[i])) / (maximum(input_df[i]) - minimum(input_df[i]))\n", " end\n", " norm_df\n", "end\n", "\n", "# import data\n", "\n", "dating_df = readtable(\"datingTestSet2.txt\", separator='\\t', header=false);\n", "names!(dating_df, [:miles, :games, :ice_cream, :opinion])\n", "head(dating_df)\n", "\n", "norm_df = normalize(dating_df, [1:3]);\n", "head(norm_df)" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "
miles | games | ice_cream | opinion | |
---|---|---|---|---|
1 | 0.44832535360950115 | 0.39805139251704247 | 0.562333528687216 | 3 |
2 | 0.15873259342850568 | 0.3419546659888891 | 0.9872441587123406 | 2 |
3 | 0.2854294260076912 | 0.06892523280719681 | 0.47449628503016766 | 1 |
4 | 0.8232007274878661 | 0.6284800736390028 | 0.2524892865215854 | 1 |
5 | 0.4201023303715228 | 0.07982026591745278 | 0.07857829588853851 | 1 |
6 | 0.7997217139789423 | 0.4848018932137898 | 0.608960546188209 | 1 |