{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "Taking `examples/examples.ipynb` as a starting point. " ] }, { "cell_type": "code", "execution_count": 1, "metadata": { "collapsed": true }, "outputs": [], "source": [ "%matplotlib inline\n", "%load_ext autoreload\n", "%autoreload 2" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "collapsed": true }, "outputs": [], "source": [ "import os\n", "import sys\n", "\n", "sys.path.append(\"..\")\n", "sys.path.append(\"../..\")\n", "\n", "import numpy as np \n", "import pandas as pd\n", "import yellowbrick as yb" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "collapsed": true }, "outputs": [], "source": [ "from yellowbrick.features import (ParallelCoordinates,\n", " parallel_coordinates)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Dataset" ] }, { "cell_type": "code", "execution_count": 4, "metadata": { "collapsed": true }, "outputs": [], "source": [ "from download import download_all \n", "\n", "## The path to the test data sets\n", "FIXTURES = os.path.join(os.getcwd(), \"data\")\n", "\n", "## Dataset loading mechanisms\n", "datasets = {\n", " \"credit\": os.path.join(FIXTURES, \"credit\", \"credit.csv\"),\n", " \"concrete\": os.path.join(FIXTURES, \"concrete\", \"concrete.csv\"),\n", " \"occupancy\": os.path.join(FIXTURES, \"occupancy\", \"occupancy.csv\"),\n", " \"mushroom\": os.path.join(FIXTURES, \"mushroom\", \"mushroom.csv\"),\n", "}\n", "\n", "def load_data(name, download=True):\n", " \"\"\"\n", " Loads and wrangles the passed in dataset by name.\n", " If download is specified, this method will download any missing files. \n", " \"\"\"\n", " # Get the path from the datasets \n", " path = datasets[name]\n", " \n", " # Check if the data exists, otherwise download or raise \n", " if not os.path.exists(path):\n", " if download:\n", " download_all() \n", " else:\n", " raise ValueError((\n", " \"'{}' dataset has not been downloaded, \"\n", " \"use the download.py module to fetch datasets\"\n", " ).format(name))\n", " \n", " # Return the data frame\n", " return pd.read_csv(path)" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "20560\n" ] }, { "data": { "text/html": [ "
| \n", " | datetime | \n", "temperature | \n", "relative humidity | \n", "light | \n", "C02 | \n", "humidity | \n", "occupancy | \n", "
|---|---|---|---|---|---|---|---|
| 0 | \n", "2015-02-04 17:51:00 | \n", "23.18 | \n", "27.2720 | \n", "426.0 | \n", "721.25 | \n", "0.004793 | \n", "1 | \n", "
| 1 | \n", "2015-02-04 17:51:59 | \n", "23.15 | \n", "27.2675 | \n", "429.5 | \n", "714.00 | \n", "0.004783 | \n", "1 | \n", "
| 2 | \n", "2015-02-04 17:53:00 | \n", "23.15 | \n", "27.2450 | \n", "426.0 | \n", "713.50 | \n", "0.004779 | \n", "1 | \n", "
| 3 | \n", "2015-02-04 17:54:00 | \n", "23.15 | \n", "27.2000 | \n", "426.0 | \n", "708.25 | \n", "0.004772 | \n", "1 | \n", "
| 4 | \n", "2015-02-04 17:55:00 | \n", "23.10 | \n", "27.2000 | \n", "426.0 | \n", "704.50 | \n", "0.004757 | \n", "1 | \n", "