{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": { "collapsed": false }, "outputs": [ { "data": { "application/javascript": [ "if(window['d3'] === undefined ||\n", " window['Nyaplot'] === undefined){\n", " var path = {\"d3\":\"https://cdnjs.cloudflare.com/ajax/libs/d3/3.5.5/d3.min\",\"downloadable\":\"https://cdn.rawgit.com/domitry/d3-downloadable/master/d3-downloadable\"};\n", "\n", "\n", "\n", " var shim = {\"d3\":{\"exports\":\"d3\"},\"downloadable\":{\"exports\":\"downloadable\"}};\n", "\n", " require.config({paths: path, shim:shim});\n", "\n", "\n", "require(['d3'], function(d3){window['d3']=d3;console.log('finished loading d3');require(['downloadable'], function(downloadable){window['downloadable']=downloadable;console.log('finished loading downloadable');\n", "\n", "\tvar script = d3.select(\"head\")\n", "\t .append(\"script\")\n", "\t .attr(\"src\", \"https://cdn.rawgit.com/domitry/Nyaplotjs/master/release/nyaplot.js\")\n", "\t .attr(\"async\", true);\n", "\n", "\tscript[0][0].onload = script[0][0].onreadystatechange = function(){\n", "\n", "\n", "\t var event = document.createEvent(\"HTMLEvents\");\n", "\t event.initEvent(\"load_nyaplot\",false,false);\n", "\t window.dispatchEvent(event);\n", "\t console.log('Finished loading Nyaplotjs');\n", "\n", "\t};\n", "\n", "\n", "});});\n", "}\n" ], "text/plain": [ "\"if(window['d3'] === undefined ||\\n window['Nyaplot'] === undefined){\\n var path = {\\\"d3\\\":\\\"https://cdnjs.cloudflare.com/ajax/libs/d3/3.5.5/d3.min\\\",\\\"downloadable\\\":\\\"https://cdn.rawgit.com/domitry/d3-downloadable/master/d3-downloadable\\\"};\\n\\n\\n\\n var shim = {\\\"d3\\\":{\\\"exports\\\":\\\"d3\\\"},\\\"downloadable\\\":{\\\"exports\\\":\\\"downloadable\\\"}};\\n\\n require.config({paths: path, shim:shim});\\n\\n\\nrequire(['d3'], function(d3){window['d3']=d3;console.log('finished loading d3');require(['downloadable'], function(downloadable){window['downloadable']=downloadable;console.log('finished loading downloadable');\\n\\n\\tvar script = d3.select(\\\"head\\\")\\n\\t .append(\\\"script\\\")\\n\\t .attr(\\\"src\\\", \\\"https://cdn.rawgit.com/domitry/Nyaplotjs/master/release/nyaplot.js\\\")\\n\\t .attr(\\\"async\\\", true);\\n\\n\\tscript[0][0].onload = script[0][0].onreadystatechange = function(){\\n\\n\\n\\t var event = document.createEvent(\\\"HTMLEvents\\\");\\n\\t event.initEvent(\\\"load_nyaplot\\\",false,false);\\n\\t window.dispatchEvent(event);\\n\\t console.log('Finished loading Nyaplotjs');\\n\\n\\t};\\n\\n\\n});});\\n}\\n\"" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/plain": [ "true" ] }, "execution_count": 1, "metadata": {}, "output_type": "execute_result" } ], "source": [ "require '~/workspace/daru/lib/daru.rb'" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Categorical Vector Visualization" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ ":category" ] }, "execution_count": 2, "metadata": {}, "output_type": "execute_result" } ], "source": [ "dv = Daru::Vector.new ['III']*10 + ['II']*5 + ['I']*5, type: :category, categories: ['I', 'II', 'III']\n", "dv.type" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Bar graph" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### 1. Frequency (count)" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n" ], "text/plain": [ "#[#[#:bar, :options=>{:x=>\"data0\", :y=>\"data1\"}, :data=>\"41b5e0de-6d64-4c48-8f5b-0b8f86156fde\"}, @xrange=[\"I\", \"II\", \"III\"], @yrange=[0, 10]>], :options=>{:x_label=>\"Categories\", :y_label=>\"Frequency\", :width=>700, :xrange=>[\"I\", \"II\", \"III\"], :yrange=>[0, 10]}}>], :data=>{\"41b5e0de-6d64-4c48-8f5b-0b8f86156fde\"=>#\"I\", :data1=>5}, {:data0=>\"II\", :data1=>5}, {:data0=>\"III\", :data1=>10}]>}, :extension=>[]}>" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "dv.plot(type: :bar) do |p, d|\n", " p.x_label 'Categories'\n", " p.y_label 'Frequency'\n", "end" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### 2. Percentage" ] }, { "cell_type": "code", "execution_count": 4, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n" ], "text/plain": [ "#[#[#:bar, :options=>{:x=>\"data0\", :y=>\"data1\"}, :data=>\"23a81301-3d6d-443f-a92a-eb381c761094\"}, @xrange=[\"I\", \"II\", \"III\"], @yrange=[0, 50.0]>], :options=>{:yrange=>[0, 100], :x_label=>\"Categories\", :y_label=>\"Percentage (%)\", :width=>700, :xrange=>[\"I\", \"II\", \"III\"]}}>], :data=>{\"23a81301-3d6d-443f-a92a-eb381c761094\"=>#\"I\", :data1=>25.0}, {:data0=>\"II\", :data1=>25.0}, {:data0=>\"III\", :data1=>50.0}]>}, :extension=>[]}>" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "dv.plot(type: :bar, method: :percentage) do |p, d|\n", " p.x_label 'Categories'\n", " p.y_label 'Percentage (%)'\n", "end" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### 3. Fraction" ] }, { "cell_type": "code", "execution_count": 5, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n" ], "text/plain": [ "#[#[#:bar, :options=>{:x=>\"data0\", :y=>\"data1\"}, :data=>\"c21d60da-c9dc-484d-a188-396ea9462142\"}, @xrange=[\"I\", \"II\", \"III\"], @yrange=[0, 0.5]>], :options=>{:yrange=>[0, 1], :x_label=>\"Categories\", :y_label=>\"Fraction\", :width=>700, :xrange=>[\"I\", \"II\", \"III\"]}}>], :data=>{\"c21d60da-c9dc-484d-a188-396ea9462142\"=>#\"I\", :data1=>0.25}, {:data0=>\"II\", :data1=>0.25}, {:data0=>\"III\", :data1=>0.5}]>}, :extension=>[]}>" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "dv.plot(type: :bar, method: :fraction) do |p, d|\n", " p.x_label 'Categories'\n", " p.y_label 'Fraction'\n", "end" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Categorical data visualization in Dataframe" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Bar Graph" ] }, { "cell_type": "code", "execution_count": 6, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ ":category" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df = Daru::DataFrame.new({\n", " a: [1, 2, 4, -2, 5, 23, 0],\n", " b: [3, 1, 3, -6, 2, 1, 0],\n", " c: ['I', 'II', 'I', 'III', 'I', 'III', 'II']\n", " })\n", "df.to_category :c\n", "df[:c].type" ] }, { "cell_type": "code", "execution_count": 7, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n" ], "text/plain": [ "#[#[#:bar, :options=>{:value=>:c}, :data=>\"2a8288b2-af59-4e04-b4c5-cb4dbdd9e29a\"}, @xrange=[\"I\", \"II\", \"III\"], @yrange=[0, 7]>], :options=>{:width=>700, :xrange=>[\"I\", \"II\", \"III\"], :yrange=>[0, 7]}}>], :data=>{\"2a8288b2-af59-4e04-b4c5-cb4dbdd9e29a\"=>#1, :b=>3, :c=>\"I\"}, {:a=>2, :b=>1, :c=>\"II\"}, {:a=>4, :b=>3, :c=>\"I\"}, {:a=>-2, :b=>-6, :c=>\"III\"}, {:a=>5, :b=>2, :c=>\"I\"}, {:a=>23, :b=>1, :c=>\"III\"}, {:a=>0, :b=>0, :c=>\"II\"}]>}, :extension=>[]}>" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "df.plot(type: :bar, x: :c)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Scatter plot categorized by categorical variable" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Plots can be categorized by\n", "- Color\n", "- Size\n", "- Shape" ] }, { "cell_type": "code", "execution_count": 8, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ ":category" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df = Daru::DataFrame.new({\n", " a: [1, 2, 4, -2, 5, 23, 0],\n", " b: [3, 1, 3, -6, 2, 1, 0],\n", " c: ['I', 'II', 'I', 'III', 'I', 'III', 'II']\n", " })\n", "df.to_category :c\n", "df[:c].type" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Below are few examples" ] }, { "cell_type": "code", "execution_count": 9, "metadata": { "collapsed": false, "scrolled": true }, "outputs": [ { "data": { "text/html": [ "
\n", "\n" ], "text/plain": [ "#[#[#:scatter, :options=>{:x=>:a, :y=>:b, :title=>\"I\", :color=>\"rgb(179,226,205)\", :tooltip_contents=>[\"I\", \"I\", \"I\"]}, :data=>\"a5ee171d-6199-4413-aaf5-7fad7c506839\"}, @xrange=[1, 5], @yrange=[2, 3]>, #:scatter, :options=>{:x=>:a, :y=>:b, :title=>\"II\", :color=>\"rgb(253,205,172)\", :tooltip_contents=>[\"II\", \"II\"]}, :data=>\"3d026663-dddc-4c35-9076-696d5fe3596f\"}, @xrange=[0, 2], @yrange=[0, 1]>, #:scatter, :options=>{:x=>:a, :y=>:b, :title=>\"III\", :color=>\"rgb(203,213,232)\", :tooltip_contents=>[\"III\", \"III\"]}, :data=>\"41580b79-f4ab-4e0a-ba5a-d7aeeb24ecdd\"}, @xrange=[-2, 23], @yrange=[-6, 1]>], :options=>{:legend=>true, :xrange=>[-10, 10], :yrange=>[-10, 10], :zoom=>true, :width=>800}}>], :data=>{\"a5ee171d-6199-4413-aaf5-7fad7c506839\"=>#1, :b=>3}, {:a=>4, :b=>3}, {:a=>5, :b=>2}]>, \"3d026663-dddc-4c35-9076-696d5fe3596f\"=>#2, :b=>1}, {:a=>0, :b=>0}]>, \"41580b79-f4ab-4e0a-ba5a-d7aeeb24ecdd\"=>#-2, :b=>-6}, {:a=>23, :b=>1}]>}, :extension=>[]}>" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "df.plot(type: :scatter, x: :a, y: :b, categorized: {by: :c, method: :color}) do |p, d|\n", " p.xrange [-10, 10]\n", " p.yrange [-10, 10]\n", "end" ] }, { "cell_type": "code", "execution_count": 10, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n" ], "text/plain": [ "#[#[#:scatter, :options=>{:x=>:a, :y=>:b, :title=>\"I\", :shape=>\"circle\", :tooltip_contents=>[\"I\", \"I\", \"I\"]}, :data=>\"f01ebd28-0bf9-4c49-aba4-2c6cc0e6f279\"}, @xrange=[1, 5], @yrange=[2, 3]>, #:scatter, :options=>{:x=>:a, :y=>:b, :title=>\"II\", :shape=>\"triangle-up\", :tooltip_contents=>[\"II\", \"II\"]}, :data=>\"11b9c546-4b02-4ede-976e-53559f28d7a9\"}, @xrange=[0, 2], @yrange=[0, 1]>, #:scatter, :options=>{:x=>:a, :y=>:b, :title=>\"III\", :shape=>\"diamond\", :tooltip_contents=>[\"III\", \"III\"]}, :data=>\"aa4fda41-bfc8-4269-8e85-1e537099a555\"}, @xrange=[-2, 23], @yrange=[-6, 1]>], :options=>{:legend=>true, :xrange=>[-10, 10], :yrange=>[-10, 10], :zoom=>true, :width=>800}}>], :data=>{\"f01ebd28-0bf9-4c49-aba4-2c6cc0e6f279\"=>#1, :b=>3}, {:a=>4, :b=>3}, {:a=>5, :b=>2}]>, \"11b9c546-4b02-4ede-976e-53559f28d7a9\"=>#2, :b=>1}, {:a=>0, :b=>0}]>, \"aa4fda41-bfc8-4269-8e85-1e537099a555\"=>#-2, :b=>-6}, {:a=>23, :b=>1}]>}, :extension=>[]}>" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "df.plot(type: :scatter, x: :a, y: :b, categorized: {by: :c, method: :shape}) do |p, d|\n", " p.xrange [-10, 10]\n", " p.yrange [-10, 10]\n", "end" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "One can also specify custom colors, size and shape. For example:" ] }, { "cell_type": "code", "execution_count": 11, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n" ], "text/plain": [ "#[#[#:scatter, :options=>{:x=>:a, :y=>:b, :title=>\"I\", :color=>:red, :tooltip_contents=>[\"I\", \"I\", \"I\"]}, :data=>\"d75d5cd0-1060-49c4-acbe-add4fe8b0eb7\"}, @xrange=[1, 5], @yrange=[2, 3]>, #:scatter, :options=>{:x=>:a, :y=>:b, :title=>\"II\", :color=>:blue, :tooltip_contents=>[\"II\", \"II\"]}, :data=>\"7385f28f-7e9a-4581-b40d-ae87146c2ce0\"}, @xrange=[0, 2], @yrange=[0, 1]>, #:scatter, :options=>{:x=>:a, :y=>:b, :title=>\"III\", :color=>:green, :tooltip_contents=>[\"III\", \"III\"]}, :data=>\"c8ffffd3-d0a9-40b7-a27f-5e4d993910ad\"}, @xrange=[-2, 23], @yrange=[-6, 1]>], :options=>{:legend=>true, :xrange=>[-10, 10], :yrange=>[-10, 10], :zoom=>true, :width=>800}}>], :data=>{\"d75d5cd0-1060-49c4-acbe-add4fe8b0eb7\"=>#1, :b=>3}, {:a=>4, :b=>3}, {:a=>5, :b=>2}]>, \"7385f28f-7e9a-4581-b40d-ae87146c2ce0\"=>#2, :b=>1}, {:a=>0, :b=>0}]>, \"c8ffffd3-d0a9-40b7-a27f-5e4d993910ad\"=>#-2, :b=>-6}, {:a=>23, :b=>1}]>}, :extension=>[]}>" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "df.plot(type: :scatter, x: :a, y: :b, categorized: {by: :c, method: :color, color: [:red, :blue, :green]}) do |p, d|\n", " p.xrange [-10, 10]\n", " p.yrange [-10, 10]\n", "end" ] }, { "cell_type": "code", "execution_count": 12, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n" ], "text/plain": [ "#[#[#:scatter, :options=>{:x=>:a, :y=>:b, :title=>\"I\", :size=>300, :tooltip_contents=>[\"I\", \"I\", \"I\"]}, :data=>\"365ceaf6-2520-4063-9060-93d50810e4d7\"}, @xrange=[1, 5], @yrange=[2, 3]>, #:scatter, :options=>{:x=>:a, :y=>:b, :title=>\"II\", :size=>600, :tooltip_contents=>[\"II\", \"II\"]}, :data=>\"bff6fee3-7a63-4148-b680-13be0dd7c5ce\"}, @xrange=[0, 2], @yrange=[0, 1]>, #:scatter, :options=>{:x=>:a, :y=>:b, :title=>\"III\", :size=>900, :tooltip_contents=>[\"III\", \"III\"]}, :data=>\"5b8eaee6-0571-452f-ae84-0b8d31ef526b\"}, @xrange=[-2, 23], @yrange=[-6, 1]>], :options=>{:legend=>true, :xrange=>[-10, 10], :yrange=>[-10, 10], :zoom=>true, :width=>800}}>], :data=>{\"365ceaf6-2520-4063-9060-93d50810e4d7\"=>#1, :b=>3}, {:a=>4, :b=>3}, {:a=>5, :b=>2}]>, \"bff6fee3-7a63-4148-b680-13be0dd7c5ce\"=>#2, :b=>1}, {:a=>0, :b=>0}]>, \"5b8eaee6-0571-452f-ae84-0b8d31ef526b\"=>#-2, :b=>-6}, {:a=>23, :b=>1}]>}, :extension=>[]}>" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "df.plot(type: :scatter, x: :a, y: :b, categorized: {by: :c, method: :size, size: [300, 600, 900]}) do |p, d|\n", " p.xrange [-10, 10]\n", " p.yrange [-10, 10]\n", "end" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Line plot categorized by categorical variable" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "It works similar to Scatter plot above and all options are same except that there's no categorization by **size** but instead there is categorization by **stroke_width** in line plots." ] }, { "cell_type": "code", "execution_count": 13, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ ":category" ] }, "execution_count": 13, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df = Daru::DataFrame.new({\n", " a: [1, 2, 3, 4, 5, 6, 7, 8, 9],\n", " b: [2, 4, 6, 1, 3, 5, 6, 4, 3],\n", " c: ['I']*3 + ['II']*3 + ['III']*3\n", " })\n", "df.to_category :c\n", "df[:c].type" ] }, { "cell_type": "code", "execution_count": 14, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n" ], "text/plain": [ "#[#[#:line, :options=>{:x=>:a, :y=>:b, :title=>\"I\", :color=>\"rgb(179,226,205)\"}, :data=>\"58c6d56c-9383-406b-a556-6f72562c0995\"}, @xrange=[1, 3], @yrange=[2, 6]>, #:line, :options=>{:x=>:a, :y=>:b, :title=>\"II\", :color=>\"rgb(253,205,172)\"}, :data=>\"03c5fc57-9f64-4075-909a-8a26417b5e7e\"}, @xrange=[4, 6], @yrange=[1, 5]>, #:line, :options=>{:x=>:a, :y=>:b, :title=>\"III\", :color=>\"rgb(203,213,232)\"}, :data=>\"ef81c6dd-e95b-4a18-8226-1bfb0301e448\"}, @xrange=[7, 9], @yrange=[3, 6]>], :options=>{:legend=>true, :zoom=>true, :width=>800, :xrange=>[1, 9], :yrange=>[1, 6]}}>], :data=>{\"58c6d56c-9383-406b-a556-6f72562c0995\"=>#1, :b=>2}, {:a=>2, :b=>4}, {:a=>3, :b=>6}]>, \"03c5fc57-9f64-4075-909a-8a26417b5e7e\"=>#4, :b=>1}, {:a=>5, :b=>3}, {:a=>6, :b=>5}]>, \"ef81c6dd-e95b-4a18-8226-1bfb0301e448\"=>#7, :b=>6}, {:a=>8, :b=>4}, {:a=>9, :b=>3}]>}, :extension=>[]}>" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "df.plot type: :line, x: :a, y: :b, categorized: {by: :c, method: :color} do end" ] }, { "cell_type": "code", "execution_count": 15, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n" ], "text/plain": [ "#[#[#:line, :options=>{:x=>:a, :y=>:b, :title=>\"I\", :stroke_width=>2}, :data=>\"a2060ac5-73a0-4391-b907-3c8f97627222\"}, @xrange=[1, 3], @yrange=[2, 6]>, #:line, :options=>{:x=>:a, :y=>:b, :title=>\"II\", :stroke_width=>4}, :data=>\"2315e956-2857-45e0-9438-cf7041aa55f1\"}, @xrange=[4, 6], @yrange=[1, 5]>, #:line, :options=>{:x=>:a, :y=>:b, :title=>\"III\", :stroke_width=>6}, :data=>\"a98a9383-8a5e-4106-9e09-0047130b2265\"}, @xrange=[7, 9], @yrange=[3, 6]>], :options=>{:legend=>true, :xrange=>[-10, 10], :yrange=>[-10, 10], :zoom=>true, :width=>800}}>], :data=>{\"a2060ac5-73a0-4391-b907-3c8f97627222\"=>#1, :b=>2}, {:a=>2, :b=>4}, {:a=>3, :b=>6}]>, \"2315e956-2857-45e0-9438-cf7041aa55f1\"=>#4, :b=>1}, {:a=>5, :b=>3}, {:a=>6, :b=>5}]>, \"a98a9383-8a5e-4106-9e09-0047130b2265\"=>#7, :b=>6}, {:a=>8, :b=>4}, {:a=>9, :b=>3}]>}, :extension=>[]}>" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "df.plot type: :line, x: :a, y: :b, categorized: {by: :c, method: :stroke_width} do |p, d|\n", " p.xrange [-10, 10]\n", " p.yrange [-10, 10] \n", "end" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Ruby 2.3.0", "language": "ruby", "name": "ruby" }, "language_info": { "file_extension": ".rb", "mimetype": "application/x-ruby", "name": "ruby", "version": "2.3.0" } }, "nbformat": 4, "nbformat_minor": 0 }