{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# The Blaze Ecosystem\n", "http://blaze.pydata.org/\n", "\n", "The Blaze ecosystem is a set of libraries that help users store, describe, query and process data. It is composed of the following core projects:\n", "\n", "* [Blaze](http://blaze.readthedocs.io/en/latest/index.html): An interface to query data on different storage systems\n", "* Dask: Parallel computing through task scheduling and blocked algorithms\n", "* Datashape: A data description language\n", "* DyND: A C++ library for dynamic, multidimensional arrays\n", "* Odo: Data migration between different storage systems\n", "\n", "**Github: **https://github.com/blaze/blaze.git\n", "\n", "**Blog:** \n", "* Balze: http://blaze.pydata.org/pages/talks/ep2015-blaze/\n", "* Github Analyze with Dask: http://blaze.pydata.org/blog/2016/02/17/dask-distributed-1/\n" ] }, { "cell_type": "raw", "metadata": { "collapsed": false }, "source": [ "%%!\n", "cd ..\n", "source activate GISpark\n", "git clone https://github.com/blaze/blaze.git\n", "cd blaze\n", "python setup.py install " ] }, { "cell_type": "code", "execution_count": 43, "metadata": { "collapsed": false }, "outputs": [], "source": [ "from blaze import *\n", "#from blaze import compute\n", "#from blaze import data\n", "#from blaze.utils import example\n", "from blaze import examples" ] }, { "cell_type": "code", "execution_count": 44, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Help on package blaze:\n", "\n", "NAME\n", " blaze\n", "\n", "PACKAGE CONTENTS\n", " _version\n", " cached\n", " compatibility\n", " compute (package)\n", " dispatch\n", " expr (package)\n", " index\n", " interactive\n", " mongo\n", " partition\n", " server (package)\n", " sql\n", " tests (package)\n", " utils\n", "\n", "SUBMODULES\n", " datetime\n", " examples\n", "\n", "DATA\n", " Broadcastable = (, , , , \n", " absolute_import = _Feature((2, 5, 0, 'alpha', 1), (3, 0, 0, 'alpha', 0...\n", " acos = \n", " acosh = \n", " all = \n", " all_formats = frozenset({SerializationFormat(name='json', loads=\n", " api = \n", " append = \n", " asin = \n", " asinh = \n", " atan = \n", " atan2 = \n", " atanh = \n", " by = \n", " ceil = \n", " compute = \n", " compute_down = \n", " compute_up = \n", " convert = \n", " copysign = \n", " cos = \n", " cosh = \n", " create_index = \n", " degrees = \n", " discover = \n", " dispatch = functools.partial(\n", " exp = \n", " expm1 = \n", " floor = \n", " fmod = \n", " greatest = \n", " hypot = \n", " i = 4\n", " inf = inf\n", " into = \n", " isnan = \n", " json_format = SerializationFormat(name='json', loads=\n", " least = \n", " log = \n", " log10 = \n", " log1p = \n", " max = \n", " mean = \n", " min = \n", " msgpack_format = SerializationFormat(name='msgpack', loads=functo...x7...\n", " nan = nan\n", " optimize = \n", " pickle_format = SerializationFormat(name='pickle', loads=\n", " print_function = _Feature((2, 6, 0, 'alpha', 2), (3, 0, 0, 'alpha', 0)...\n", " radians = \n", " resource = \n", " rowfunc = \n", " shape = \n", " sin = \n", " sinh = \n", " sqrt = \n", " std = \n", " sum = \n", " tan = \n", " tanh = \n", " to_html = \n", " trunc = \n", " var = \n", "\n", "VERSION\n", " 0.9.1\n", "\n", "FILE\n", " /home/supermap/anaconda3/envs/GISpark/lib/python3.5/site-packages/blaze/__init__.py\n", "\n", "\n" ] } ], "source": [ "help(blaze)\n", "#help(examples)" ] }, { "cell_type": "code", "execution_count": 59, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "5" ] }, "execution_count": 59, "metadata": {}, "output_type": "execute_result" } ], "source": [ "js = JSON(example('accounts.json'))\n", "s = symbol('s', discover(js))\n", "compute(s.count(), js)\n", "\n", "#jss = JSONLines(example('accounts-streaming.json'))\n", "#compute(s.count(), jss)" ] }, { "cell_type": "code", "execution_count": 56, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
idnamebalance
01Alice100
12Bob-200
23Charlie300
34Denis400
45Edith-500
" ], "text/plain": [ " id name balance\n", "0 1 Alice 100\n", "1 2 Bob -200\n", "2 3 Charlie 300\n", "3 4 Denis 400\n", "4 5 Edith -500" ] }, "execution_count": 56, "metadata": {}, "output_type": "execute_result" } ], "source": [ "t = Data([(1, 'Alice', 100),\n", " (2, 'Bob', -200),\n", " (3, 'Charlie', 300),\n", " (4, 'Denis', 400),\n", " (5, 'Edith', -500)],\n", " fields=['id', 'name', 'balance'])\n", "#help(t)\n", "t" ] }, { "cell_type": "code", "execution_count": 55, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
sepal_lengthsepal_widthpetal_lengthpetal_widthspecies
05.13.51.40.2Iris-setosa
14.93.01.40.2Iris-setosa
24.73.21.30.2Iris-setosa
34.63.11.50.2Iris-setosa
45.03.61.40.2Iris-setosa
55.43.91.70.4Iris-setosa
64.63.41.40.3Iris-setosa
75.03.41.50.2Iris-setosa
84.42.91.40.2Iris-setosa
94.93.11.50.1Iris-setosa
105.43.71.50.2Iris-setosa
" ], "text/plain": [ " sepal_length sepal_width petal_length petal_width species\n", "0 5.1 3.5 1.4 0.2 Iris-setosa\n", "1 4.9 3.0 1.4 0.2 Iris-setosa\n", "2 4.7 3.2 1.3 0.2 Iris-setosa\n", "3 4.6 3.1 1.5 0.2 Iris-setosa\n", "4 5.0 3.6 1.4 0.2 Iris-setosa\n", "5 5.4 3.9 1.7 0.4 Iris-setosa\n", "6 4.6 3.4 1.4 0.3 Iris-setosa\n", "7 5.0 3.4 1.5 0.2 Iris-setosa\n", "8 4.4 2.9 1.4 0.2 Iris-setosa\n", "9 4.9 3.1 1.5 0.1 Iris-setosa\n", "..." ] }, "execution_count": 55, "metadata": {}, "output_type": "execute_result" } ], "source": [ "iris = Data(example('iris.csv'))\n", "iris" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.5.1" } }, "nbformat": 4, "nbformat_minor": 0 }