{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "%load_ext autoreload\n", "%autoreload 2" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "import sys\n", "sys.path.append(\"..\")" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "C:\\Users\\argenisleon\\Anaconda3\\lib\\site-packages\\dask\\config.py:161: YAMLLoadWarning: calling yaml.load() without Loader=... is deprecated, as the default Loader is unsafe. Please read https://msg.pyyaml.org/load for full details.\n", " data = yaml.load(f.read()) or {}\n", "C:\\Users\\argenisleon\\Anaconda3\\lib\\site-packages\\statsmodels\\compat\\pandas.py:49: FutureWarning: The Panel class is removed from pandas. Accessing it from the top-level namespace will also be removed in the next version\n", " data_klasses = (pandas.Series, pandas.DataFrame, pandas.Panel)\n" ] } ], "source": [ "from optimus import Optimus" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "op = Optimus(\"dask_cudf\", comm=True)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "df = op.load.csv(\"https://raw.githubusercontent.com/ironmussa/Optimus/master/examples/data/foo.csv\", sep=\",\", header=True, infer_schema='true', charset=\"UTF-8\").ext.cache()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "df.ext.display()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "df = df.ext.send(output=\"json\", infer=False, advanced_stats=False)" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "{'count_uniques': {'price': 6.000274674963478}}" ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.cols.count_uniques(\"*\")" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "1" ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.cols.count_na(\"*\")" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [ { "ename": "TypeError", "evalue": "unsupported operand type(s) for ** or pow(): 'Array' and 'float'", "output_type": "error", "traceback": [ "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[1;31mTypeError\u001b[0m Traceback (most recent call last)", "\u001b[1;32m\u001b[0m in \u001b[0;36m\u001b[1;34m\u001b[0m\n\u001b[0;32m 1\u001b[0m \u001b[1;32mfrom\u001b[0m \u001b[0moptimus\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mprofiler\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mprofiler\u001b[0m \u001b[1;32mimport\u001b[0m \u001b[0mProfiler\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 2\u001b[0m \u001b[0mp\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mProfiler\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m----> 3\u001b[1;33m \u001b[0mp\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mrun\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mdf\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m", "\u001b[1;32m~\\Documents\\Optimus\\optimus\\helpers\\decorators.py\u001b[0m in \u001b[0;36mtimed\u001b[1;34m(*args, **kw)\u001b[0m\n\u001b[0;32m 8\u001b[0m \u001b[1;32mdef\u001b[0m \u001b[0mtimed\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m*\u001b[0m\u001b[0margs\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m**\u001b[0m\u001b[0mkw\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 9\u001b[0m \u001b[0mstart_time\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mtimeit\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mdefault_timer\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 10\u001b[1;33m \u001b[0mf\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mmethod\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m*\u001b[0m\u001b[0margs\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m**\u001b[0m\u001b[0mkw\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 11\u001b[0m \u001b[0m_time\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mround\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mtimeit\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mdefault_timer\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m \u001b[1;33m-\u001b[0m \u001b[0mstart_time\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;36m2\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 12\u001b[0m \u001b[0mlogger\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mprint\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m\"{name}() executed in {time} sec\"\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mformat\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mname\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mmethod\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m__name__\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mtime\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0m_time\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", "\u001b[1;32m~\\Documents\\Optimus\\optimus\\profiler\\profiler.py\u001b[0m in \u001b[0;36mrun\u001b[1;34m(self, df, columns, buckets, infer, relative_error, approx_count, mismatch, advanced_stats)\u001b[0m\n\u001b[0;32m 72\u001b[0m \u001b[0mcolumns\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mparse_columns\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mdf\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mcolumns\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 73\u001b[0m output = self.dataset(df, columns, buckets, infer, relative_error, approx_count, format=\"dict\",\n\u001b[1;32m---> 74\u001b[1;33m mismatch=mismatch, advanced_stats=advanced_stats)\n\u001b[0m\u001b[0;32m 75\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 76\u001b[0m \u001b[1;31m# Load jinja\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", "\u001b[1;32m~\\Documents\\Optimus\\optimus\\profiler\\profiler.py\u001b[0m in \u001b[0;36mdataset\u001b[1;34m(self, df, columns, buckets, infer, relative_error, approx_count, sample, stats, format, mismatch, advanced_stats)\u001b[0m\n\u001b[0;32m 325\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mcols_count\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mcols_count\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mlen\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mdf\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mcolumns\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 326\u001b[0m updated_columns = self.columns_stats(df, cols_to_profile, buckets, infer, relative_error, approx_count,\n\u001b[1;32m--> 327\u001b[1;33m mismatch, advanced_stats)\n\u001b[0m\u001b[0;32m 328\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 329\u001b[0m \u001b[0moutput_columns\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mupdate_dict\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0moutput_columns\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mupdated_columns\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", "\u001b[1;32m~\\Documents\\Optimus\\optimus\\profiler\\profiler.py\u001b[0m in \u001b[0;36mcolumns_stats\u001b[1;34m(self, df, columns, buckets, infer, relative_error, approx_count, mismatch, advanced_stats)\u001b[0m\n\u001b[0;32m 436\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 437\u001b[0m \u001b[1;31m# Aggregation\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 438\u001b[1;33m \u001b[0mstats\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mcolumns_agg\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mdf\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mcolumns\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mbuckets\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mrelative_error\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mapprox_count\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0madvanced_stats\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 439\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 440\u001b[0m \u001b[1;31m# Calculate Frequency\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", "\u001b[1;32m~\\Documents\\Optimus\\optimus\\profiler\\profiler.py\u001b[0m in \u001b[0;36mcolumns_agg\u001b[1;34m(self, df, columns, buckets, relative_error, approx_count, advanced_stats)\u001b[0m\n\u001b[0;32m 493\u001b[0m funcs = [df.functions.stddev, df.functions.kurtosis, df.functions.mean, df.functions.skewness,\n\u001b[0;32m 494\u001b[0m df.functions.sum, df.functions.variance, df.functions.zeros_agg]\n\u001b[1;32m--> 495\u001b[1;33m \u001b[0mexprs\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mextend\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mdf\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mcols\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mcreate_exprs\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mcols\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mfuncs\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 496\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 497\u001b[0m \u001b[1;31m# TODO: None in basic calculation\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", "\u001b[1;32m~\\Documents\\Optimus\\optimus\\engines\\base\\dask\\columns.py\u001b[0m in \u001b[0;36mcreate_exprs\u001b[1;34m(self, columns, funcs, *args)\u001b[0m\n\u001b[0;32m 537\u001b[0m \u001b[0mexprs\u001b[0m\u001b[1;33m[\u001b[0m\u001b[0mcol_name\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mupdate\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mfunc\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mcol_name\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0margs\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mdf\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 538\u001b[0m \u001b[1;32melse\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 539\u001b[1;33m \u001b[0mexprs\u001b[0m\u001b[1;33m[\u001b[0m\u001b[0mcol_name\u001b[0m\u001b[1;33m]\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mfunc\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mcol_name\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0margs\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mdf\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 540\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 541\u001b[0m \u001b[0mresult\u001b[0m \u001b[1;33m=\u001b[0m \u001b[1;33m{\u001b[0m\u001b[1;33m}\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", "\u001b[1;32m~\\Documents\\Optimus\\optimus\\engines\\dask\\functions.py\u001b[0m in \u001b[0;36m_kurtoris\u001b[1;34m(serie)\u001b[0m\n\u001b[0;32m 132\u001b[0m \u001b[1;32mdef\u001b[0m \u001b[0mkurtosis\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mcol_name\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0margs\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 133\u001b[0m \u001b[1;32mdef\u001b[0m \u001b[0m_kurtoris\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mserie\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 134\u001b[1;33m \u001b[0mresult\u001b[0m \u001b[1;33m=\u001b[0m \u001b[1;33m{\u001b[0m\u001b[1;34m\"kurtosis\"\u001b[0m\u001b[1;33m:\u001b[0m \u001b[0mfloat\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mstats\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mkurtosis\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mserie\u001b[0m\u001b[1;33m[\u001b[0m\u001b[0mcol_name\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m}\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 135\u001b[0m \u001b[1;32mreturn\u001b[0m \u001b[0mresult\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 136\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n", "\u001b[1;32m~\\Anaconda3\\lib\\site-packages\\dask\\array\\stats.py\u001b[0m in \u001b[0;36mkurtosis\u001b[1;34m(a, axis, fisher, bias, nan_policy)\u001b[0m\n\u001b[0;32m 227\u001b[0m \u001b[0molderr\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mnp\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mseterr\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mall\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;34m'ignore'\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 228\u001b[0m \u001b[1;32mtry\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 229\u001b[1;33m \u001b[0mvals\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mda\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mwhere\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mzero\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;36m0\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mm4\u001b[0m \u001b[1;33m/\u001b[0m \u001b[0mm2\u001b[0m\u001b[1;33m**\u001b[0m\u001b[1;36m2.0\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 230\u001b[0m \u001b[1;32mfinally\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 231\u001b[0m \u001b[0mnp\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mseterr\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m**\u001b[0m\u001b[0molderr\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", "\u001b[1;31mTypeError\u001b[0m: unsupported operand type(s) for ** or pow(): 'Array' and 'float'" ] } ], "source": [ "from optimus.profiler.profiler import Profiler\n", "p = Profiler()\n", "p.run(df)" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "data": { "text/html": [ "Open Bumblebee: https://app.hi-bumblebee.com
If you really care about privacy get your keys in bumblebee.ini and put them here
" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "op= Optimus(\"dask\", comm=True)" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [], "source": [ "# url = \"https://raw.githubusercontent.com/ironmussa/Optimus/master/examples/data/crime.csv\"\n", "url = \"data/crime.csv\"" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
INCIDENT_NUMBEROFFENSE_CODEOFFENSE_CODE_GROUPOFFENSE_DESCRIPTIONDISTRICTREPORTING_AREASHOOTINGOCCURRED_ON_DATEYEARMONTHDAY_OF_WEEKHOURUCR_PARTSTREETLatLongLocation
0I182070945619LarcenyLARCENY ALL OTHERSD14808NaN2018-09-02 13:00:0020189Sunday13Part OneLINCOLN ST42.357791-71.139371(42.35779134, -71.13937053)
1I1820709431402VandalismVANDALISMC11347NaN2018-08-21 00:00:0020188Tuesday0Part TwoHECLA ST42.306821-71.060300(42.30682138, -71.06030035)
2I1820709413410TowedTOWED MOTOR VEHICLED4151NaN2018-09-03 19:27:0020189Monday19Part ThreeCAZENOVE ST42.346589-71.072429(42.34658879, -71.07242943)
3I1820709403114Investigate PropertyINVESTIGATE PROPERTYD4272NaN2018-09-03 21:16:0020189Monday21Part ThreeNEWCOMB ST42.334182-71.078664(42.33418175, -71.07866441)
4I1820709383114Investigate PropertyINVESTIGATE PROPERTYB3421NaN2018-09-03 21:05:0020189Monday21Part ThreeDELHI ST42.275365-71.090361(42.27536542, -71.09036101)
5I1820709363820Motor Vehicle Accident ResponseM/V ACCIDENT INVOLVING PEDESTRIAN - INJURYC11398NaN2018-09-03 21:09:0020189Monday21Part ThreeTALBOT AVE42.290196-71.071590(42.29019621, -71.07159012)
6I182070933724Auto TheftAUTO THEFTB2330NaN2018-09-03 21:25:0020189Monday21Part OneNORMANDY ST42.306072-71.082733(42.30607218, -71.08273260)
7I1820709323301Verbal DisputesVERBAL DISPUTEB2584NaN2018-09-03 20:39:3720189Monday20Part ThreeLAWN ST42.327016-71.105551(42.32701648, -71.10555088)
8I182070931301RobberyROBBERY - STREETC6177NaN2018-09-03 20:48:0020189Monday20Part OneMASSACHUSETTS AVE42.331521-71.070853(42.33152148, -71.07085307)
9I1820709293301Verbal DisputesVERBAL DISPUTEC11364NaN2018-09-03 20:38:0020189Monday20Part ThreeLESLIE ST42.295147-71.058608(42.29514664, -71.05860832)
10I1820709283301Verbal DisputesVERBAL DISPUTEC6913NaN2018-09-03 19:55:0020189Monday19Part ThreeOCEAN VIEW DR42.319579-71.040328(42.31957856, -71.04032766)
11I1820709273114Investigate PropertyINVESTIGATE PROPERTYC6936NaN2018-09-03 20:19:0020189Monday20Part ThreeDALESSIO CT42.340115-71.053390(42.34011469, -71.05339029)
12I1820709233108Fire Related ReportsFIRE REPORT - HOUSE, BUILDING, ETC.D4139NaN2018-09-03 19:58:0020189Monday19Part ThreeMARLBOROUGH ST42.350388-71.087853(42.35038760, -71.08785290)
13I1820709222647OtherTHREATS TO DO BODILY HARMB3429NaN2018-09-03 20:39:0020189Monday20Part TwoWOODROW AVE42.286470-71.087147(42.28647012, -71.08714661)
14I1820709213201Property LostPROPERTY - LOSTB3469NaN2018-09-02 14:00:0020189Sunday14Part ThreeMULVEY ST42.279241-71.096674(42.27924052, -71.09667382)
15I1820709203006Medical AssistanceSICK/INJURED/MEDICAL - PERSONNaNNaN2018-09-03 19:43:0020189Monday19Part ThreeNaN42.352875-71.073830(42.35287456, -71.07382970)
16I1820709193301Verbal DisputesVERBAL DISPUTEC11341NaN2018-09-03 18:52:0020189Monday18Part ThreeSTONEHURST ST42.305264-71.066838(42.30526428, -71.06683755)
17I1820709183305Assembly or Gathering ViolationsDEMONSTRATIONS/RIOTD4130NaN2018-09-03 17:00:0020189Monday17Part ThreeHUNTINGTON AVE42.348577-71.077720(42.34857652, -71.07772012)
18I1820709172647OtherTHREATS TO DO BODILY HARMB2901NaN2018-09-03 19:52:0020189Monday19Part TwoHORADAN WAY42.333717-71.096658(42.33371742, -71.09665806)
19I182070915614Larceny From Motor VehicleLARCENY THEFT FROM MV - NON-ACCESSORYB2181NaN2018-09-02 18:00:0020189Sunday18Part OneSHIRLEY ST42.325695-71.068168(42.32569490, -71.06816778)
20I1820709133006Medical AssistanceSICK/INJURED/MEDICAL - PERSONNaNNaN2018-09-03 18:46:0020189Monday18Part ThreeWOLCOTT-1.000000-1.000000(-1.00000000, -1.00000000)
21I1820709113801Motor Vehicle Accident ResponseM/V ACCIDENT - OTHERA169NaN2018-09-03 18:30:0020189Monday18Part ThreeBEACON ST42.355644-71.071681(42.35564426, -71.07168077)
22I1820709103006Medical AssistanceSICK/INJURED/MEDICAL - PERSONB3434NaN2018-09-03 18:42:0020189Monday18Part ThreeCAPEN ST42.283402-71.080797(42.28340243, -71.08079740)
23I1820709093803Motor Vehicle Accident ResponseM/V ACCIDENT - PERSONAL INJURYE5550NaN2018-09-03 18:33:0020189Monday18Part ThreeWASHINGTON ST42.275818-71.139913(42.27581799, -71.13991259)
24I182070908522Residential BurglaryBURGLARY - RESIDENTIAL - NO FORCEB2911NaN2018-09-03 18:38:0020189Monday18Part OneANNUNCIATION RD42.335062-71.093168(42.33506218, -71.09316781)
25I1820709063831Motor Vehicle Accident ResponseM/V - LEAVING SCENE - PROPERTY DAMAGENaNNaN2018-09-03 18:20:0020189Monday18Part ThreeNaN42.283593-71.055657(42.28359328, -71.05565683)
26I1820709053006Medical AssistanceSICK/INJURED/MEDICAL - PERSOND4172NaN2018-09-03 18:50:0020189Monday18Part ThreeMASSACHUSETTS AVE42.333112-71.072764(42.33311189, -71.07276370)
27I182070904802Simple AssaultASSAULT SIMPLE - BATTERYC11242NaN2018-09-03 18:34:0020189Monday18Part TwoANNAPOLIS ST42.317319-71.061509(42.31731905, -71.06150882)
28I1820709042007Restraining Order ViolationsVIOL. OF RESTRAINING ORDER W NO ARRESTC11242NaN2018-09-03 18:34:0020189Monday18Part TwoANNAPOLIS ST42.317319-71.061509(42.31731905, -71.06150882)
29I1820709032900OtherVAL - VIOLATION OF AUTO LAW - OTHERB3463NaN2018-09-03 18:55:0020189Monday18Part TwoBLUE HILL AVE42.295904-71.087733(42.29590385, -71.08773294)
......................................................
319043I110551302-003125Warrant ArrestsWARRANT ARRESTD4171NaN2015-07-22 22:00:0020157Wednesday22Part ThreeHARRISON AVE42.335560-71.074364(42.33555954, -71.07436364)
319044I110551302-00623LarcenyLARCENY SHOPLIFTING $50 TO $199D4171NaN2015-07-22 22:00:0020157Wednesday22Part OneHARRISON AVE42.335560-71.074364(42.33555954, -71.07436364)
319045I110372326-00403Aggravated AssaultASSAULT & BATTERY D/W - OTHERA197NaN2016-06-14 09:40:0020166Tuesday9Part OneSCHOOL ST42.357428-71.058326(42.35742837, -71.05832551)
319046I110372326-003125Warrant ArrestsWARRANT ARRESTA197NaN2016-06-14 09:40:0020166Tuesday9Part ThreeSCHOOL ST42.357428-71.058326(42.35742837, -71.05832551)
319047I110261417-003125Warrant ArrestsWARRANT ARRESTB2324NaN2016-07-29 00:00:0020167Friday0Part ThreeBOWDOIN ST42.307038-71.066153(42.30703835, -71.06615319)
319048I110261417-00619LarcenyLARCENY OTHER $200 & OVERB2324NaN2016-07-29 00:00:0020167Friday0Part OneBOWDOIN ST42.307038-71.066153(42.30703835, -71.06615319)
319049I110177502-003125Warrant ArrestsWARRANT ARRESTB2318NaN2015-10-02 21:00:00201510Friday21Part ThreeHOMESTEAD ST42.311277-71.089093(42.31127726, -71.08909334)
319050I110177502-00802Simple AssaultASSAULT & BATTERYB2318NaN2015-10-02 21:00:00201510Friday21Part TwoHOMESTEAD ST42.311277-71.089093(42.31127726, -71.08909334)
319051I110177502-003125Warrant ArrestsWARRANT ARRESTB2318NaN2015-10-02 21:00:00201510Friday21Part ThreeHOMESTEAD ST42.311277-71.089093(42.31127726, -71.08909334)
319052I100636670-00629LarcenyLARCENY OTHER $50 TO $199D4285NaN2016-06-05 17:23:0020166Sunday17Part OneCOVENTRY ST42.336951-71.085748(42.33695098, -71.08574813)
319053I100636670-003125Warrant ArrestsWARRANT ARRESTD4285NaN2016-06-05 17:23:0020166Sunday17Part ThreeCOVENTRY ST42.336951-71.085748(42.33695098, -71.08574813)
319054I100340225-003125Warrant ArrestsWARRANT ARRESTA177NaN2015-07-27 10:47:0020157Monday10Part ThreeBOWDOIN SQ42.361645-71.062299(42.36164502, -71.06229949)
319055I100340225-00339RobberyROBBERY - UNARMED - STREETA177NaN2015-07-27 10:47:0020157Monday10Part OneBOWDOIN SQ42.361645-71.062299(42.36164502, -71.06229949)
319056I100222105-023125Warrant ArrestsWARRANT ARRESTE13572NaN2015-08-03 16:22:0020158Monday16Part ThreeCOLUMBUS AVE42.313628-71.095603(42.31362799, -71.09560307)
319057I100033064-002907ViolationsVAL - OPERATING AFTER REV/SUSP.B2304NaN2016-07-29 18:20:0020167Friday18Part TwoSLAYTON WAY42.321770-71.097798(42.32177032, -71.09779774)
319058I100033064-002910ViolationsVAL - OPERATING AFTER REV/SUSP.B2304NaN2016-07-29 18:20:0020167Friday18Part TwoSLAYTON WAY42.321770-71.097798(42.32177032, -71.09779774)
319059I090321958-003125Warrant ArrestsWARRANT ARRESTC11355NaN2016-02-01 01:43:0020162Monday1Part ThreeGENEVA AVENaNNaN(0.00000000, 0.00000000)
319060I090321958-003125Warrant ArrestsWARRANT ARRESTC11355NaN2016-02-01 01:43:0020162Monday1Part ThreeGENEVA AVENaNNaN(0.00000000, 0.00000000)
319061I090317057-00403Aggravated AssaultASSAULT & BATTERY D/W - OTHERB3458NaN2015-11-20 11:15:00201511Friday11Part OneBLUE HILL AVE42.301897-71.085549(42.30189690, -71.08554944)
319062I090317057-003125Warrant ArrestsWARRANT ARRESTB3458NaN2015-11-20 11:15:00201511Friday11Part ThreeBLUE HILL AVE42.301897-71.085549(42.30189690, -71.08554944)
319063I080542626-003125Warrant ArrestsWARRANT ARRESTA1111NaN2015-08-12 12:00:0020158Wednesday12Part ThreeBOYLSTON ST42.352312-71.063705(42.35231190, -71.06370510)
319064I080542626-001848Drug ViolationDRUGS - POSS CLASS B - INTENT TO MFR DIST DISPA1111NaN2015-08-12 12:00:0020158Wednesday12Part TwoBOYLSTON ST42.352312-71.063705(42.35231190, -71.06370510)
319065I080542626-001849Drug ViolationDRUGS - POSS CLASS B - COCAINE, ETC.A1111NaN2015-08-12 12:00:0020158Wednesday12Part TwoBOYLSTON ST42.352312-71.063705(42.35231190, -71.06370510)
319066I060168073-001864Drug ViolationDRUGS - POSS CLASS D - INTENT MFR DIST DISPE13912NaN2018-01-27 14:01:0020181Saturday14Part TwoCENTRE ST42.322838-71.100967(42.32283759, -71.10096723)
319067I060168073-003125Warrant ArrestsWARRANT ARRESTE13912NaN2018-01-27 14:01:0020181Saturday14Part ThreeCENTRE ST42.322838-71.100967(42.32283759, -71.10096723)
319068I050310906-003125Warrant ArrestsWARRANT ARRESTD4285NaN2016-06-05 17:25:0020166Sunday17Part ThreeCOVENTRY ST42.336951-71.085748(42.33695098, -71.08574813)
319069I030217815-08111HomicideMURDER, NON-NEGLIGIENT MANSLAUGHTERE18520NaN2015-07-09 13:38:0020157Thursday13Part OneRIVER ST42.255926-71.123172(42.25592648, -71.12317207)
319070I030217815-083125Warrant ArrestsWARRANT ARRESTE18520NaN2015-07-09 13:38:0020157Thursday13Part ThreeRIVER ST42.255926-71.123172(42.25592648, -71.12317207)
319071I010370257-003125Warrant ArrestsWARRANT ARRESTE13569NaN2016-05-31 19:35:0020165Tuesday19Part ThreeNEW WASHINGTON ST42.302333-71.111565(42.30233307, -71.11156487)
3190721420525503125Warrant ArrestsWARRANT ARRESTD4903NaN2015-06-22 00:12:0020156Monday0Part ThreeWASHINGTON ST42.333839-71.080290(42.33383935, -71.08029038)
\n", "

319073 rows × 17 columns

\n", "
" ], "text/plain": [ " INCIDENT_NUMBER OFFENSE_CODE OFFENSE_CODE_GROUP \\\n", "0 I182070945 619 Larceny \n", "1 I182070943 1402 Vandalism \n", "2 I182070941 3410 Towed \n", "3 I182070940 3114 Investigate Property \n", "4 I182070938 3114 Investigate Property \n", "5 I182070936 3820 Motor Vehicle Accident Response \n", "6 I182070933 724 Auto Theft \n", "7 I182070932 3301 Verbal Disputes \n", "8 I182070931 301 Robbery \n", "9 I182070929 3301 Verbal Disputes \n", "10 I182070928 3301 Verbal Disputes \n", "11 I182070927 3114 Investigate Property \n", "12 I182070923 3108 Fire Related Reports \n", "13 I182070922 2647 Other \n", "14 I182070921 3201 Property Lost \n", "15 I182070920 3006 Medical Assistance \n", "16 I182070919 3301 Verbal Disputes \n", "17 I182070918 3305 Assembly or Gathering Violations \n", "18 I182070917 2647 Other \n", "19 I182070915 614 Larceny From Motor Vehicle \n", "20 I182070913 3006 Medical Assistance \n", "21 I182070911 3801 Motor Vehicle Accident Response \n", "22 I182070910 3006 Medical Assistance \n", "23 I182070909 3803 Motor Vehicle Accident Response \n", "24 I182070908 522 Residential Burglary \n", "25 I182070906 3831 Motor Vehicle Accident Response \n", "26 I182070905 3006 Medical Assistance \n", "27 I182070904 802 Simple Assault \n", "28 I182070904 2007 Restraining Order Violations \n", "29 I182070903 2900 Other \n", "... ... ... ... \n", "319043 I110551302-00 3125 Warrant Arrests \n", "319044 I110551302-00 623 Larceny \n", "319045 I110372326-00 403 Aggravated Assault \n", "319046 I110372326-00 3125 Warrant Arrests \n", "319047 I110261417-00 3125 Warrant Arrests \n", "319048 I110261417-00 619 Larceny \n", "319049 I110177502-00 3125 Warrant Arrests \n", "319050 I110177502-00 802 Simple Assault \n", "319051 I110177502-00 3125 Warrant Arrests \n", "319052 I100636670-00 629 Larceny \n", "319053 I100636670-00 3125 Warrant Arrests \n", "319054 I100340225-00 3125 Warrant Arrests \n", "319055 I100340225-00 339 Robbery \n", "319056 I100222105-02 3125 Warrant Arrests \n", "319057 I100033064-00 2907 Violations \n", "319058 I100033064-00 2910 Violations \n", "319059 I090321958-00 3125 Warrant Arrests \n", "319060 I090321958-00 3125 Warrant Arrests \n", "319061 I090317057-00 403 Aggravated Assault \n", "319062 I090317057-00 3125 Warrant Arrests \n", "319063 I080542626-00 3125 Warrant Arrests \n", "319064 I080542626-00 1848 Drug Violation \n", "319065 I080542626-00 1849 Drug Violation \n", "319066 I060168073-00 1864 Drug Violation \n", "319067 I060168073-00 3125 Warrant Arrests \n", "319068 I050310906-00 3125 Warrant Arrests \n", "319069 I030217815-08 111 Homicide \n", "319070 I030217815-08 3125 Warrant Arrests \n", "319071 I010370257-00 3125 Warrant Arrests \n", "319072 142052550 3125 Warrant Arrests \n", "\n", " OFFENSE_DESCRIPTION DISTRICT \\\n", "0 LARCENY ALL OTHERS D14 \n", "1 VANDALISM C11 \n", "2 TOWED MOTOR VEHICLE D4 \n", "3 INVESTIGATE PROPERTY D4 \n", "4 INVESTIGATE PROPERTY B3 \n", "5 M/V ACCIDENT INVOLVING PEDESTRIAN - INJURY C11 \n", "6 AUTO THEFT B2 \n", "7 VERBAL DISPUTE B2 \n", "8 ROBBERY - STREET C6 \n", "9 VERBAL DISPUTE C11 \n", "10 VERBAL DISPUTE C6 \n", "11 INVESTIGATE PROPERTY C6 \n", "12 FIRE REPORT - HOUSE, BUILDING, ETC. D4 \n", "13 THREATS TO DO BODILY HARM B3 \n", "14 PROPERTY - LOST B3 \n", "15 SICK/INJURED/MEDICAL - PERSON NaN \n", "16 VERBAL DISPUTE C11 \n", "17 DEMONSTRATIONS/RIOT D4 \n", "18 THREATS TO DO BODILY HARM B2 \n", "19 LARCENY THEFT FROM MV - NON-ACCESSORY B2 \n", "20 SICK/INJURED/MEDICAL - PERSON NaN \n", "21 M/V ACCIDENT - OTHER A1 \n", "22 SICK/INJURED/MEDICAL - PERSON B3 \n", "23 M/V ACCIDENT - PERSONAL INJURY E5 \n", "24 BURGLARY - RESIDENTIAL - NO FORCE B2 \n", "25 M/V - LEAVING SCENE - PROPERTY DAMAGE NaN \n", "26 SICK/INJURED/MEDICAL - PERSON D4 \n", "27 ASSAULT SIMPLE - BATTERY C11 \n", "28 VIOL. OF RESTRAINING ORDER W NO ARREST C11 \n", "29 VAL - VIOLATION OF AUTO LAW - OTHER B3 \n", "... ... ... \n", "319043 WARRANT ARREST D4 \n", "319044 LARCENY SHOPLIFTING $50 TO $199 D4 \n", "319045 ASSAULT & BATTERY D/W - OTHER A1 \n", "319046 WARRANT ARREST A1 \n", "319047 WARRANT ARREST B2 \n", "319048 LARCENY OTHER $200 & OVER B2 \n", "319049 WARRANT ARREST B2 \n", "319050 ASSAULT & BATTERY B2 \n", "319051 WARRANT ARREST B2 \n", "319052 LARCENY OTHER $50 TO $199 D4 \n", "319053 WARRANT ARREST D4 \n", "319054 WARRANT ARREST A1 \n", "319055 ROBBERY - UNARMED - STREET A1 \n", "319056 WARRANT ARREST E13 \n", "319057 VAL - OPERATING AFTER REV/SUSP. B2 \n", "319058 VAL - OPERATING AFTER REV/SUSP. B2 \n", "319059 WARRANT ARREST C11 \n", "319060 WARRANT ARREST C11 \n", "319061 ASSAULT & BATTERY D/W - OTHER B3 \n", "319062 WARRANT ARREST B3 \n", "319063 WARRANT ARREST A1 \n", "319064 DRUGS - POSS CLASS B - INTENT TO MFR DIST DISP A1 \n", "319065 DRUGS - POSS CLASS B - COCAINE, ETC. A1 \n", "319066 DRUGS - POSS CLASS D - INTENT MFR DIST DISP E13 \n", "319067 WARRANT ARREST E13 \n", "319068 WARRANT ARREST D4 \n", "319069 MURDER, NON-NEGLIGIENT MANSLAUGHTER E18 \n", "319070 WARRANT ARREST E18 \n", "319071 WARRANT ARREST E13 \n", "319072 WARRANT ARREST D4 \n", "\n", " REPORTING_AREA SHOOTING OCCURRED_ON_DATE YEAR MONTH DAY_OF_WEEK \\\n", "0 808 NaN 2018-09-02 13:00:00 2018 9 Sunday \n", "1 347 NaN 2018-08-21 00:00:00 2018 8 Tuesday \n", "2 151 NaN 2018-09-03 19:27:00 2018 9 Monday \n", "3 272 NaN 2018-09-03 21:16:00 2018 9 Monday \n", "4 421 NaN 2018-09-03 21:05:00 2018 9 Monday \n", "5 398 NaN 2018-09-03 21:09:00 2018 9 Monday \n", "6 330 NaN 2018-09-03 21:25:00 2018 9 Monday \n", "7 584 NaN 2018-09-03 20:39:37 2018 9 Monday \n", "8 177 NaN 2018-09-03 20:48:00 2018 9 Monday \n", "9 364 NaN 2018-09-03 20:38:00 2018 9 Monday \n", "10 913 NaN 2018-09-03 19:55:00 2018 9 Monday \n", "11 936 NaN 2018-09-03 20:19:00 2018 9 Monday \n", "12 139 NaN 2018-09-03 19:58:00 2018 9 Monday \n", "13 429 NaN 2018-09-03 20:39:00 2018 9 Monday \n", "14 469 NaN 2018-09-02 14:00:00 2018 9 Sunday \n", "15 NaN 2018-09-03 19:43:00 2018 9 Monday \n", "16 341 NaN 2018-09-03 18:52:00 2018 9 Monday \n", "17 130 NaN 2018-09-03 17:00:00 2018 9 Monday \n", "18 901 NaN 2018-09-03 19:52:00 2018 9 Monday \n", "19 181 NaN 2018-09-02 18:00:00 2018 9 Sunday \n", "20 NaN 2018-09-03 18:46:00 2018 9 Monday \n", "21 69 NaN 2018-09-03 18:30:00 2018 9 Monday \n", "22 434 NaN 2018-09-03 18:42:00 2018 9 Monday \n", "23 550 NaN 2018-09-03 18:33:00 2018 9 Monday \n", "24 911 NaN 2018-09-03 18:38:00 2018 9 Monday \n", "25 NaN 2018-09-03 18:20:00 2018 9 Monday \n", "26 172 NaN 2018-09-03 18:50:00 2018 9 Monday \n", "27 242 NaN 2018-09-03 18:34:00 2018 9 Monday \n", "28 242 NaN 2018-09-03 18:34:00 2018 9 Monday \n", "29 463 NaN 2018-09-03 18:55:00 2018 9 Monday \n", "... ... ... ... ... ... ... \n", "319043 171 NaN 2015-07-22 22:00:00 2015 7 Wednesday \n", "319044 171 NaN 2015-07-22 22:00:00 2015 7 Wednesday \n", "319045 97 NaN 2016-06-14 09:40:00 2016 6 Tuesday \n", "319046 97 NaN 2016-06-14 09:40:00 2016 6 Tuesday \n", "319047 324 NaN 2016-07-29 00:00:00 2016 7 Friday \n", "319048 324 NaN 2016-07-29 00:00:00 2016 7 Friday \n", "319049 318 NaN 2015-10-02 21:00:00 2015 10 Friday \n", "319050 318 NaN 2015-10-02 21:00:00 2015 10 Friday \n", "319051 318 NaN 2015-10-02 21:00:00 2015 10 Friday \n", "319052 285 NaN 2016-06-05 17:23:00 2016 6 Sunday \n", "319053 285 NaN 2016-06-05 17:23:00 2016 6 Sunday \n", "319054 77 NaN 2015-07-27 10:47:00 2015 7 Monday \n", "319055 77 NaN 2015-07-27 10:47:00 2015 7 Monday \n", "319056 572 NaN 2015-08-03 16:22:00 2015 8 Monday \n", "319057 304 NaN 2016-07-29 18:20:00 2016 7 Friday \n", "319058 304 NaN 2016-07-29 18:20:00 2016 7 Friday \n", "319059 355 NaN 2016-02-01 01:43:00 2016 2 Monday \n", "319060 355 NaN 2016-02-01 01:43:00 2016 2 Monday \n", "319061 458 NaN 2015-11-20 11:15:00 2015 11 Friday \n", "319062 458 NaN 2015-11-20 11:15:00 2015 11 Friday \n", "319063 111 NaN 2015-08-12 12:00:00 2015 8 Wednesday \n", "319064 111 NaN 2015-08-12 12:00:00 2015 8 Wednesday \n", "319065 111 NaN 2015-08-12 12:00:00 2015 8 Wednesday \n", "319066 912 NaN 2018-01-27 14:01:00 2018 1 Saturday \n", "319067 912 NaN 2018-01-27 14:01:00 2018 1 Saturday \n", "319068 285 NaN 2016-06-05 17:25:00 2016 6 Sunday \n", "319069 520 NaN 2015-07-09 13:38:00 2015 7 Thursday \n", "319070 520 NaN 2015-07-09 13:38:00 2015 7 Thursday \n", "319071 569 NaN 2016-05-31 19:35:00 2016 5 Tuesday \n", "319072 903 NaN 2015-06-22 00:12:00 2015 6 Monday \n", "\n", " HOUR UCR_PART STREET Lat Long \\\n", "0 13 Part One LINCOLN ST 42.357791 -71.139371 \n", "1 0 Part Two HECLA ST 42.306821 -71.060300 \n", "2 19 Part Three CAZENOVE ST 42.346589 -71.072429 \n", "3 21 Part Three NEWCOMB ST 42.334182 -71.078664 \n", "4 21 Part Three DELHI ST 42.275365 -71.090361 \n", "5 21 Part Three TALBOT AVE 42.290196 -71.071590 \n", "6 21 Part One NORMANDY ST 42.306072 -71.082733 \n", "7 20 Part Three LAWN ST 42.327016 -71.105551 \n", "8 20 Part One MASSACHUSETTS AVE 42.331521 -71.070853 \n", "9 20 Part Three LESLIE ST 42.295147 -71.058608 \n", "10 19 Part Three OCEAN VIEW DR 42.319579 -71.040328 \n", "11 20 Part Three DALESSIO CT 42.340115 -71.053390 \n", "12 19 Part Three MARLBOROUGH ST 42.350388 -71.087853 \n", "13 20 Part Two WOODROW AVE 42.286470 -71.087147 \n", "14 14 Part Three MULVEY ST 42.279241 -71.096674 \n", "15 19 Part Three NaN 42.352875 -71.073830 \n", "16 18 Part Three STONEHURST ST 42.305264 -71.066838 \n", "17 17 Part Three HUNTINGTON AVE 42.348577 -71.077720 \n", "18 19 Part Two HORADAN WAY 42.333717 -71.096658 \n", "19 18 Part One SHIRLEY ST 42.325695 -71.068168 \n", "20 18 Part Three WOLCOTT -1.000000 -1.000000 \n", "21 18 Part Three BEACON ST 42.355644 -71.071681 \n", "22 18 Part Three CAPEN ST 42.283402 -71.080797 \n", "23 18 Part Three WASHINGTON ST 42.275818 -71.139913 \n", "24 18 Part One ANNUNCIATION RD 42.335062 -71.093168 \n", "25 18 Part Three NaN 42.283593 -71.055657 \n", "26 18 Part Three MASSACHUSETTS AVE 42.333112 -71.072764 \n", "27 18 Part Two ANNAPOLIS ST 42.317319 -71.061509 \n", "28 18 Part Two ANNAPOLIS ST 42.317319 -71.061509 \n", "29 18 Part Two BLUE HILL AVE 42.295904 -71.087733 \n", "... ... ... ... ... ... \n", "319043 22 Part Three HARRISON AVE 42.335560 -71.074364 \n", "319044 22 Part One HARRISON AVE 42.335560 -71.074364 \n", "319045 9 Part One SCHOOL ST 42.357428 -71.058326 \n", "319046 9 Part Three SCHOOL ST 42.357428 -71.058326 \n", "319047 0 Part Three BOWDOIN ST 42.307038 -71.066153 \n", "319048 0 Part One BOWDOIN ST 42.307038 -71.066153 \n", "319049 21 Part Three HOMESTEAD ST 42.311277 -71.089093 \n", "319050 21 Part Two HOMESTEAD ST 42.311277 -71.089093 \n", "319051 21 Part Three HOMESTEAD ST 42.311277 -71.089093 \n", "319052 17 Part One COVENTRY ST 42.336951 -71.085748 \n", "319053 17 Part Three COVENTRY ST 42.336951 -71.085748 \n", "319054 10 Part Three BOWDOIN SQ 42.361645 -71.062299 \n", "319055 10 Part One BOWDOIN SQ 42.361645 -71.062299 \n", "319056 16 Part Three COLUMBUS AVE 42.313628 -71.095603 \n", "319057 18 Part Two SLAYTON WAY 42.321770 -71.097798 \n", "319058 18 Part Two SLAYTON WAY 42.321770 -71.097798 \n", "319059 1 Part Three GENEVA AVE NaN NaN \n", "319060 1 Part Three GENEVA AVE NaN NaN \n", "319061 11 Part One BLUE HILL AVE 42.301897 -71.085549 \n", "319062 11 Part Three BLUE HILL AVE 42.301897 -71.085549 \n", "319063 12 Part Three BOYLSTON ST 42.352312 -71.063705 \n", "319064 12 Part Two BOYLSTON ST 42.352312 -71.063705 \n", "319065 12 Part Two BOYLSTON ST 42.352312 -71.063705 \n", "319066 14 Part Two CENTRE ST 42.322838 -71.100967 \n", "319067 14 Part Three CENTRE ST 42.322838 -71.100967 \n", "319068 17 Part Three COVENTRY ST 42.336951 -71.085748 \n", "319069 13 Part One RIVER ST 42.255926 -71.123172 \n", "319070 13 Part Three RIVER ST 42.255926 -71.123172 \n", "319071 19 Part Three NEW WASHINGTON ST 42.302333 -71.111565 \n", "319072 0 Part Three WASHINGTON ST 42.333839 -71.080290 \n", "\n", " Location \n", "0 (42.35779134, -71.13937053) \n", "1 (42.30682138, -71.06030035) \n", "2 (42.34658879, -71.07242943) \n", "3 (42.33418175, -71.07866441) \n", "4 (42.27536542, -71.09036101) \n", "5 (42.29019621, -71.07159012) \n", "6 (42.30607218, -71.08273260) \n", "7 (42.32701648, -71.10555088) \n", "8 (42.33152148, -71.07085307) \n", "9 (42.29514664, -71.05860832) \n", "10 (42.31957856, -71.04032766) \n", "11 (42.34011469, -71.05339029) \n", "12 (42.35038760, -71.08785290) \n", "13 (42.28647012, -71.08714661) \n", "14 (42.27924052, -71.09667382) \n", "15 (42.35287456, -71.07382970) \n", "16 (42.30526428, -71.06683755) \n", "17 (42.34857652, -71.07772012) \n", "18 (42.33371742, -71.09665806) \n", "19 (42.32569490, -71.06816778) \n", "20 (-1.00000000, -1.00000000) \n", "21 (42.35564426, -71.07168077) \n", "22 (42.28340243, -71.08079740) \n", "23 (42.27581799, -71.13991259) \n", "24 (42.33506218, -71.09316781) \n", "25 (42.28359328, -71.05565683) \n", "26 (42.33311189, -71.07276370) \n", "27 (42.31731905, -71.06150882) \n", "28 (42.31731905, -71.06150882) \n", "29 (42.29590385, -71.08773294) \n", "... ... \n", "319043 (42.33555954, -71.07436364) \n", "319044 (42.33555954, -71.07436364) \n", "319045 (42.35742837, -71.05832551) \n", "319046 (42.35742837, -71.05832551) \n", "319047 (42.30703835, -71.06615319) \n", "319048 (42.30703835, -71.06615319) \n", "319049 (42.31127726, -71.08909334) \n", "319050 (42.31127726, -71.08909334) \n", "319051 (42.31127726, -71.08909334) \n", "319052 (42.33695098, -71.08574813) \n", "319053 (42.33695098, -71.08574813) \n", "319054 (42.36164502, -71.06229949) \n", "319055 (42.36164502, -71.06229949) \n", "319056 (42.31362799, -71.09560307) \n", "319057 (42.32177032, -71.09779774) \n", "319058 (42.32177032, -71.09779774) \n", "319059 (0.00000000, 0.00000000) \n", "319060 (0.00000000, 0.00000000) \n", "319061 (42.30189690, -71.08554944) \n", "319062 (42.30189690, -71.08554944) \n", "319063 (42.35231190, -71.06370510) \n", "319064 (42.35231190, -71.06370510) \n", "319065 (42.35231190, -71.06370510) \n", "319066 (42.32283759, -71.10096723) \n", "319067 (42.32283759, -71.10096723) \n", "319068 (42.33695098, -71.08574813) \n", "319069 (42.25592648, -71.12317207) \n", "319070 (42.25592648, -71.12317207) \n", "319071 (42.30233307, -71.11156487) \n", "319072 (42.33383935, -71.08029038) \n", "\n", "[319073 rows x 17 columns]" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "import pandas as pd\n", "pd.read_csv(url, encoding='latin1')" ] }, { "cell_type": "code", "execution_count": 81, "metadata": {}, "outputs": [], "source": [ "from dask import dataframe as dd\n", "df = dd.read_csv(url, encoding='latin1').reset_index()" ] }, { "cell_type": "code", "execution_count": 83, "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n", "\n", "\n", "\n", "\n", "
Viewing 5 of 5 rows / 5 columns
\n", "
1 partition(s)
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
\n", "
index
\n", "
1 (int64)
\n", "
\n", " \n", " not nullable\n", " \n", "
\n", "
\n", "
num
\n", "
2 (int64)
\n", "
\n", " \n", " not nullable\n", " \n", "
\n", "
\n", "
idk
\n", "
3 (int64)
\n", "
\n", " \n", " not nullable\n", " \n", "
\n", "
\n", "
\n", " \n", " 0\n", " \n", "
\n", "
\n", "
\n", " \n", " 1\n", " \n", "
\n", "
\n", "
\n", " \n", " 2\n", " \n", "
\n", "
\n", "
\n", " \n", " 1\n", " \n", "
\n", "
\n", "
\n", " \n", " 2\n", " \n", "
\n", "
\n", "
\n", " \n", " 3\n", " \n", "
\n", "
\n", "
\n", " \n", " 2\n", " \n", "
\n", "
\n", "
\n", " \n", " 3\n", " \n", "
\n", "
\n", "
\n", " \n", " 4\n", " \n", "
\n", "
\n", "
\n", " \n", " 3\n", " \n", "
\n", "
\n", "
\n", " \n", " 4\n", " \n", "
\n", "
\n", "
\n", " \n", " 5\n", " \n", "
\n", "
\n", "
\n", " \n", " 4\n", " \n", "
\n", "
\n", "
\n", " \n", " 5\n", " \n", "
\n", "
\n", "
\n", " \n", " 6\n", " \n", "
\n", "
\n", "\n", "\n", "
Viewing 5 of 5 rows / 5 columns
\n", "
1 partition(s)
\n" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "df.rows.limit(5).ext.display()" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "{'num': {'min': 1}}" ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.cols.min(\"num\")" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "{'num': {'min': 1}, 'idk': {'min': 2}}" ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.cols.min(\"*\")" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "{'num': {'percentile': {'0.5': 6.0}}}" ] }, "execution_count": 11, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.cols.percentile(\"num\")" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "{'num': {'percentile': {'0.5': 6.0}}, 'idk': {'percentile': {'0.5': 4.0}}}" ] }, "execution_count": 12, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.cols.percentile(\"*\")" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [], "source": [ "a = {0.25: 3.5, 0.5: 6.0, 0.75: 8.5}" ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "{0.25: 3.5, 0.5: 6.0, 0.75: 8.5}\n" ] } ], "source": [ "print(a)" ] }, { "cell_type": "code", "execution_count": 64, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "C:\\Users\\argenisleon\\Anaconda3\\lib\\site-packages\\dask\\dataframe\\core.py:4382: UserWarning: Insufficient elements for `head`. 5 elements requested, only 4 elements available. Try passing larger `npartitions` to `head`.\n", " warnings.warn(msg.format(n, len(r)))\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ " num idk\n", "6 7 3\n", "7 8 4\n", "8 9 5\n", "9 10 6\n", "[OrderedDict([('num', 7), ('idk', 3)]), OrderedDict([('num', 8), ('idk', 4)]), OrderedDict([('num', 9), ('idk', 5)]), OrderedDict([('num', 10), ('idk', 6)])]\n" ] }, { "data": { "text/html": [ "\n", "\n", "\n", "\n", "\n", "
Viewing 4 of 4 rows / 4 columns
\n", "
1 partition(s)
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
\n", "
num
\n", "
1 (int64)
\n", "
\n", " \n", " not nullable\n", " \n", "
\n", "
\n", "
idk
\n", "
2 (int64)
\n", "
\n", " \n", " not nullable\n", " \n", "
\n", "
\n", "
\n", " \n", " 7\n", " \n", "
\n", "
\n", "
\n", " \n", " 3\n", " \n", "
\n", "
\n", "
\n", " \n", " 8\n", " \n", "
\n", "
\n", "
\n", " \n", " 4\n", " \n", "
\n", "
\n", "
\n", " \n", " 9\n", " \n", "
\n", "
\n", "
\n", " \n", " 5\n", " \n", "
\n", "
\n", "
\n", " \n", " 10\n", " \n", "
\n", "
\n", "
\n", " \n", " 6\n", " \n", "
\n", "
\n", "\n", "\n", "
Viewing 4 of 4 rows / 4 columns
\n", "
1 partition(s)
\n" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "df.rows.select((df[\"num\"] >= 6.8) & (df[\"num\"] <= 99.3)).rows.limit(10).ext.display()" ] }, { "cell_type": "code", "execution_count": 65, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "{'num': {'percentile': {'0.25': 3.5, '0.5': 6.0, '0.75': 8.5}}}\n", "{'num': {'percentile': {'0.25': 3.5, '0.5': 6.0, '0.75': 8.5}}}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "C:\\Users\\argenisleon\\Anaconda3\\lib\\site-packages\\dask\\dataframe\\core.py:4382: UserWarning: Insufficient elements for `head`. 10 elements requested, only 1 elements available. Try passing larger `npartitions` to `head`.\n", " warnings.warn(msg.format(n, len(r)))\n" ] }, { "ename": "AttributeError", "evalue": "'DataFrame' object has no attribute 'ext'", "output_type": "error", "traceback": [ "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[1;31mAttributeError\u001b[0m Traceback (most recent call last)", "\u001b[1;32m\u001b[0m in \u001b[0;36m\u001b[1;34m\u001b[0m\n\u001b[1;32m----> 1\u001b[1;33m \u001b[0mdf\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0moutliers\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mtukey\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m\"num\"\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mselect\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mext\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mdisplay\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m", "\u001b[1;32m~\\Documents\\Optimus\\optimus\\engines\\dask\\extension.py\u001b[0m in \u001b[0;36mdisplay\u001b[1;34m(limit, columns, title, truncate)\u001b[0m\n\u001b[0;32m 332\u001b[0m \u001b[1;32mdef\u001b[0m \u001b[0mdisplay\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mlimit\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;32mNone\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mcolumns\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;32mNone\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mtitle\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;32mNone\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mtruncate\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;32mTrue\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 333\u001b[0m \u001b[1;31m# TODO: limit, columns, title, truncate\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 334\u001b[1;33m \u001b[0mExt\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mtable\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mlimit\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mcolumns\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mtitle\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mtruncate\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 335\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 336\u001b[0m \u001b[1;33m@\u001b[0m\u001b[0mstaticmethod\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", "\u001b[1;32m~\\Documents\\Optimus\\optimus\\engines\\dask\\extension.py\u001b[0m in \u001b[0;36mtable\u001b[1;34m(limit, columns, title, truncate)\u001b[0m\n\u001b[0;32m 338\u001b[0m \u001b[1;32mtry\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 339\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0m__IPYTHON__\u001b[0m \u001b[1;32mand\u001b[0m \u001b[0mDataFrame\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0moutput\u001b[0m \u001b[1;32mis\u001b[0m \u001b[1;34m\"html\"\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 340\u001b[1;33m \u001b[0mresult\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mExt\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mtable_html\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mtitle\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mtitle\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mlimit\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mlimit\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mcolumns\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mcolumns\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mtruncate\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mtruncate\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 341\u001b[0m \u001b[0mprint_html\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mresult\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 342\u001b[0m \u001b[1;32melse\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", "\u001b[1;32m~\\Documents\\Optimus\\optimus\\engines\\dask\\extension.py\u001b[0m in \u001b[0;36mtable_html\u001b[1;34m(limit, columns, title, full, truncate, count)\u001b[0m\n\u001b[0;32m 288\u001b[0m \u001b[0mdata\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mdf\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mcols\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mselect\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mcolumns\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mext\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mto_dict\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 289\u001b[0m \u001b[1;32melse\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 290\u001b[1;33m \u001b[0mdata\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mdf\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mcols\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mselect\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mcolumns\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mrows\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mlimit\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mlimit\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mext\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mto_dict\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 291\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 292\u001b[0m \u001b[1;31m# Load the Jinja template\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", "\u001b[1;32m~\\AppData\\Roaming\\Python\\Python37\\site-packages\\pandas\\core\\generic.py\u001b[0m in \u001b[0;36m__getattr__\u001b[1;34m(self, name)\u001b[0m\n\u001b[0;32m 5065\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_info_axis\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_can_hold_identifiers_and_holds_name\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mname\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 5066\u001b[0m \u001b[1;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m[\u001b[0m\u001b[0mname\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m-> 5067\u001b[1;33m \u001b[1;32mreturn\u001b[0m \u001b[0mobject\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m__getattribute__\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mname\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 5068\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 5069\u001b[0m \u001b[1;32mdef\u001b[0m \u001b[0m__setattr__\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mname\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mvalue\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", "\u001b[1;31mAttributeError\u001b[0m: 'DataFrame' object has no attribute 'ext'" ] } ], "source": [ "df.outliers.tukey(\"num\").select().ext.display()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "df." ] }, { "cell_type": "code", "execution_count": 39, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "C:\\Users\\argenisleon\\Anaconda3\\lib\\site-packages\\dask\\dataframe\\core.py:4382: UserWarning: Insufficient elements for `head`. 20 elements requested, only 19 elements available. Try passing larger `npartitions` to `head`.\n", " warnings.warn(msg.format(n, len(r)))\n" ] } ], "source": [ "from dask import dataframe as dd\n", "df = dd.read_csv(\"data/foo.csv\", sep=\",\").head(20)" ] }, { "cell_type": "code", "execution_count": 52, "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n", "\n", "\n", "\n", "\n", "
Viewing 3 of 3 rows / 3 columns
\n", "
1 partition(s)
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
\n", "
id
\n", "
1 (int64)
\n", "
\n", " \n", " not nullable\n", " \n", "
\n", "
\n", "
firstName
\n", "
2 (object)
\n", "
\n", " \n", " not nullable\n", " \n", "
\n", "
\n", "
lastName
\n", "
3 (object)
\n", "
\n", " \n", " not nullable\n", " \n", "
\n", "
\n", "
billingId
\n", "
4 (float64)
\n", "
\n", " \n", " not nullable\n", " \n", "
\n", "
\n", "
product
\n", "
5 (object)
\n", "
\n", " \n", " not nullable\n", " \n", "
\n", "
\n", "
price
\n", "
6 (float64)
\n", "
\n", " \n", " not nullable\n", " \n", "
\n", "
\n", "
birth
\n", "
7 (object)
\n", "
\n", " \n", " not nullable\n", " \n", "
\n", "
\n", "
dummyCol
\n", "
8 (object)
\n", "
\n", " \n", " not nullable\n", " \n", "
\n", "
\n", "
\n", " \n", " 2\n", " \n", "
\n", "
\n", "
\n", " \n", " André\n", " \n", "
\n", "
\n", "
\n", " \n", " Ampère\n", " \n", "
\n", "
\n", "
\n", " \n", " 423.0\n", " \n", "
\n", "
\n", "
\n", " \n", " piza\n", " \n", "
\n", "
\n", "
\n", " \n", " 8.0\n", " \n", "
\n", "
\n", "
\n", " \n", " 1950/07/08\n", " \n", "
\n", "
\n", "
\n", " \n", " gonna\n", " \n", "
\n", "
\n", "
\n", " \n", " 3\n", " \n", "
\n", "
\n", "
\n", " \n", " NiELS\n", " \n", "
\n", "
\n", "
\n", " \n", " Böhr//((%%\n", " \n", "
\n", "
\n", "
\n", " \n", " 551.0\n", " \n", "
\n", "
\n", "
\n", " \n", " pizza\n", " \n", "
\n", "
\n", "
\n", " \n", " 8.0\n", " \n", "
\n", "
\n", "
\n", " \n", " 1990/07/09\n", " \n", "
\n", "
\n", "
\n", " \n", " give\n", " \n", "
\n", "
\n", "
\n", " \n", " 4\n", " \n", "
\n", "
\n", "
\n", " \n", " PAUL\n", " \n", "
\n", "
\n", "
\n", " \n", " dirac$\n", " \n", "
\n", "
\n", "
\n", " \n", " 521.0\n", " \n", "
\n", "
\n", "
\n", " \n", " pizza\n", " \n", "
\n", "
\n", "
\n", " \n", " 8.0\n", " \n", "
\n", "
\n", "
\n", " \n", " 1954/07/10\n", " \n", "
\n", "
\n", "
\n", " \n", " you\n", " \n", "
\n", "
\n", "\n", "\n", "
Viewing 3 of 3 rows / 3 columns
\n", "
1 partition(s)
\n" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "df.rows.between(\"id\",1,5).ext.display()" ] }, { "cell_type": "code", "execution_count": 54, "metadata": {}, "outputs": [], "source": [ "df1 = df.rows.append(df)" ] }, { "cell_type": "code", "execution_count": 66, "metadata": {}, "outputs": [ { "ename": "NameError", "evalue": "name 'df1' is not defined", "output_type": "error", "traceback": [ "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[1;31mNameError\u001b[0m Traceback (most recent call last)", "\u001b[1;32m\u001b[0m in \u001b[0;36m\u001b[1;34m\u001b[0m\n\u001b[1;32m----> 1\u001b[1;33m \u001b[0mdf1\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mext\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mdisplay\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m", "\u001b[1;31mNameError\u001b[0m: name 'df1' is not defined" ] } ], "source": [ "df1.ext.display()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "#https://github.com/dask/dask/pull/4229#issuecomment-449123512\n" ] }, { "cell_type": "code", "execution_count": 178, "metadata": {}, "outputs": [ { "ename": "AttributeError", "evalue": "'Series' object has no attribute 'mode'", "output_type": "error", "traceback": [ "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[1;31mAttributeError\u001b[0m Traceback (most recent call last)", "\u001b[1;32m\u001b[0m in \u001b[0;36m\u001b[1;34m\u001b[0m\n\u001b[1;32m----> 1\u001b[1;33m \u001b[0mdf\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;34m\"id\"\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mmode\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mcompute\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m", "\u001b[1;31mAttributeError\u001b[0m: 'Series' object has no attribute 'mode'" ] } ], "source": [ "df[\"id\"].mode().compute()" ] }, { "cell_type": "code", "execution_count": 114, "metadata": {}, "outputs": [], "source": [ "import pandas as pd" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "ename": "NameError", "evalue": "name 'pd' is not defined", "output_type": "error", "traceback": [ "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[1;31mNameError\u001b[0m Traceback (most recent call last)", "\u001b[1;32m\u001b[0m in \u001b[0;36m\u001b[1;34m\u001b[0m\n\u001b[1;32m----> 1\u001b[1;33m \u001b[0mdf\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mpd\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mread_csv\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m\"https://raw.githubusercontent.com/ironmussa/Optimus/master/examples/data/foo.csv\"\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0msep\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;34m\",\"\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mheader\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;36m0\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m", "\u001b[1;31mNameError\u001b[0m: name 'pd' is not defined" ] } ], "source": [ "df = pd.read_csv(\"https://raw.githubusercontent.com/ironmussa/Optimus/master/examples/data/foo.csv\", sep=\",\", header=0)" ] }, { "cell_type": "code", "execution_count": 21, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "C:\\Users\\argenisleon\\Anaconda3\\lib\\site-packages\\dask\\dataframe\\core.py:4382: UserWarning: Insufficient elements for `head`. 20 elements requested, only 13 elements available. Try passing larger `npartitions` to `head`.\n", " warnings.warn(msg.format(n, len(r)))\n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
idfirstNamelastNamebillingIdproductpricebirthdummyCol
01LuisAlvarez$$%!123.0Cake10.01980/07/07never
12AndréAmpère423.0piza8.01950/07/08gonna
23NiELSBöhr//((%%551.0pizza8.01990/07/09give
34PAULdirac$521.0pizza8.01954/07/10you
45AlbertEinstein634.0pizza8.01990/07/11up
56GalileoGALiLEI672.0arepa5.01930/08/12never
67CaRLGa%%%uss323.0taco3.01970/07/13gonna
78DavidH$$$ilbert624.0taaaccoo3.01950/07/14let
89JohannesKEPLER735.0taco3.01920/04/22you
910JaMESM$$ax%%well875.0taco3.01923/03/12down
1011IsaacNewton992.0pasta9.01999/02/15never
1112Emmy%%Nöether$234.0pasta9.01993/12/08gonna
1213NaNNaNNaNNaNNaNNaNNaN
\n", "
" ], "text/plain": [ " id firstName lastName billingId product price \\\n", "0 1 Luis Alvarez$$%! 123.0 Cake 10.0 \n", "1 2 André Ampère 423.0 piza 8.0 \n", "2 3 NiELS Böhr//((%% 551.0 pizza 8.0 \n", "3 4 PAUL dirac$ 521.0 pizza 8.0 \n", "4 5 Albert Einstein 634.0 pizza 8.0 \n", "5 6 Galileo GALiLEI 672.0 arepa 5.0 \n", "6 7 CaRL Ga%%%uss 323.0 taco 3.0 \n", "7 8 David H$$$ilbert 624.0 taaaccoo 3.0 \n", "8 9 Johannes KEPLER 735.0 taco 3.0 \n", "9 10 JaMES M$$ax%%well 875.0 taco 3.0 \n", "10 11 Isaac Newton 992.0 pasta 9.0 \n", "11 12 Emmy%% Nöether$ 234.0 pasta 9.0 \n", "12 13 NaN NaN NaN NaN NaN \n", "\n", " birth dummyCol \n", "0 1980/07/07 never \n", "1 1950/07/08 gonna \n", "2 1990/07/09 give \n", "3 1954/07/10 you \n", "4 1990/07/11 up \n", "5 1930/08/12 never \n", "6 1970/07/13 gonna \n", "7 1950/07/14 let \n", "8 1920/04/22 you \n", "9 1923/03/12 down \n", "10 1999/02/15 never \n", "11 1993/12/08 gonna \n", "12 NaN NaN " ] }, "execution_count": 21, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.head(20)" ] }, { "cell_type": "code", "execution_count": 19, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "['id',\n", " 'firstName',\n", " 'lastName',\n", " 'billingId',\n", " 'product',\n", " 'price',\n", " 'birth',\n", " 'dummyCol']" ] }, "execution_count": 19, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.cols.names()" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "https://raw.githubusercontent.com/ironmussa/Optimus/master/examples/data/foo.csv\n" ] } ], "source": [ "df = op.load.csv(\"https://raw.githubusercontent.com/ironmussa/Optimus/master/examples/data/foo.csv\", sep=\",\", header=True, infer_schema='false', null_value=\"None\")" ] }, { "cell_type": "code", "execution_count": 7, "metadata": { "scrolled": false }, "outputs": [ { "data": { "text/html": [ "\n", "\n", "\n", "\n", "\n", "
Viewing 5 of 5 rows / 5 columns
\n", "
1 partition(s)
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
\n", "
id
\n", "
1 (int64)
\n", "
\n", " \n", " not nullable\n", " \n", "
\n", "
\n", "
firstName
\n", "
2 (object)
\n", "
\n", " \n", " not nullable\n", " \n", "
\n", "
\n", "
lastName
\n", "
3 (object)
\n", "
\n", " \n", " not nullable\n", " \n", "
\n", "
\n", "
billingId
\n", "
4 (float64)
\n", "
\n", " \n", " not nullable\n", " \n", "
\n", "
\n", "
product
\n", "
5 (object)
\n", "
\n", " \n", " not nullable\n", " \n", "
\n", "
\n", "
price
\n", "
6 (float64)
\n", "
\n", " \n", " not nullable\n", " \n", "
\n", "
\n", "
birth
\n", "
7 (object)
\n", "
\n", " \n", " not nullable\n", " \n", "
\n", "
\n", "
dummyCol
\n", "
8 (object)
\n", "
\n", " \n", " not nullable\n", " \n", "
\n", "
\n", "
\n", " \n", " 1\n", " \n", "
\n", "
\n", "
\n", " \n", " Luis\n", " \n", "
\n", "
\n", "
\n", " \n", " alvarez$$%!\n", " \n", "
\n", "
\n", "
\n", " \n", " 123.0\n", " \n", "
\n", "
\n", "
\n", " \n", " Cake\n", " \n", "
\n", "
\n", "
\n", " \n", " 10.0\n", " \n", "
\n", "
\n", "
\n", " \n", " 1980/07/07\n", " \n", "
\n", "
\n", "
\n", " \n", " never\n", " \n", "
\n", "
\n", "
\n", " \n", " 2\n", " \n", "
\n", "
\n", "
\n", " \n", " André\n", " \n", "
\n", "
\n", "
\n", " \n", " ampère\n", " \n", "
\n", "
\n", "
\n", " \n", " 423.0\n", " \n", "
\n", "
\n", "
\n", " \n", " piza\n", " \n", "
\n", "
\n", "
\n", " \n", " 8.0\n", " \n", "
\n", "
\n", "
\n", " \n", " 1950/07/08\n", " \n", "
\n", "
\n", "
\n", " \n", " gonna\n", " \n", "
\n", "
\n", "
\n", " \n", " 3\n", " \n", "
\n", "
\n", "
\n", " \n", " NiELS\n", " \n", "
\n", "
\n", "
\n", " \n", " böhr//((%%\n", " \n", "
\n", "
\n", "
\n", " \n", " 551.0\n", " \n", "
\n", "
\n", "
\n", " \n", " pizza\n", " \n", "
\n", "
\n", "
\n", " \n", " 8.0\n", " \n", "
\n", "
\n", "
\n", " \n", " 1990/07/09\n", " \n", "
\n", "
\n", "
\n", " \n", " give\n", " \n", "
\n", "
\n", "
\n", " \n", " 4\n", " \n", "
\n", "
\n", "
\n", " \n", " PAUL\n", " \n", "
\n", "
\n", "
\n", " \n", " dirac$\n", " \n", "
\n", "
\n", "
\n", " \n", " 521.0\n", " \n", "
\n", "
\n", "
\n", " \n", " pizza\n", " \n", "
\n", "
\n", "
\n", " \n", " 8.0\n", " \n", "
\n", "
\n", "
\n", " \n", " 1954/07/10\n", " \n", "
\n", "
\n", "
\n", " \n", " you\n", " \n", "
\n", "
\n", "
\n", " \n", " 5\n", " \n", "
\n", "
\n", "
\n", " \n", " Albert\n", " \n", "
\n", "
\n", "
\n", " \n", " einstein\n", " \n", "
\n", "
\n", "
\n", " \n", " 634.0\n", " \n", "
\n", "
\n", "
\n", " \n", " pizza\n", " \n", "
\n", "
\n", "
\n", " \n", " 8.0\n", " \n", "
\n", "
\n", "
\n", " \n", " 1990/07/11\n", " \n", "
\n", "
\n", "
\n", " \n", " up\n", " \n", "
\n", "
\n", "\n", "\n", "
Viewing 5 of 5 rows / 5 columns
\n", "
1 partition(s)
\n" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "df.rows.limit(5).cols.lower(\"lastName\").ext.display()" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "..\\optimus\\engines\\base\\dask\\columns.py:160: FutureWarning: Using a non-tuple sequence for multidimensional indexing is deprecated; use `arr[tuple(seq)]` instead of `arr[seq]`. In the future this will be interpreted as an array index, `arr[np.array(seq)]`, which will result either in an error or a different result.\n", " scaler.transform(_df)[input_cols]\n" ] }, { "ename": "IndexError", "evalue": "only integers, slices (`:`), ellipsis (`...`), numpy.newaxis (`None`) and integer or boolean arrays are valid indices", "output_type": "error", "traceback": [ "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[1;31mIndexError\u001b[0m Traceback (most recent call last)", "\u001b[1;32m\u001b[0m in \u001b[0;36m\u001b[1;34m\u001b[0m\n\u001b[1;32m----> 1\u001b[1;33m \u001b[0mdf\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mrows\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mlimit\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;36m5\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mcols\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mmin_max_scaler\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m\"billingId\"\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mext\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mdisplay\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m", "\u001b[1;32m~\\Documents\\Optimus\\optimus\\engines\\base\\dask\\columns.py\u001b[0m in \u001b[0;36mmin_max_scaler\u001b[1;34m(self, input_cols, output_cols)\u001b[0m\n\u001b[0;32m 158\u001b[0m \u001b[0m_df\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mdf\u001b[0m\u001b[1;33m[\u001b[0m\u001b[0minput_cols\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 159\u001b[0m \u001b[0mscaler\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mfit\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0m_df\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 160\u001b[1;33m \u001b[0mscaler\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mtransform\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0m_df\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m[\u001b[0m\u001b[0minput_cols\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 161\u001b[0m \u001b[1;32mreturn\u001b[0m \u001b[0mdf\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 162\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n", "\u001b[1;31mIndexError\u001b[0m: only integers, slices (`:`), ellipsis (`...`), numpy.newaxis (`None`) and integer or boolean arrays are valid indices" ] } ], "source": [ "df.rows.limit(5).cols.min_max_scaler(\"billingId\").ext.display()" ] }, { "cell_type": "code", "execution_count": 49, "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n", "\n", "\n", "\n", "\n", "
Viewing 13 of 13 rows / 13 columns
\n", "
1 partition(s)
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
\n", "
id
\n", "
1 (int64)
\n", "
\n", " \n", " not nullable\n", " \n", "
\n", "
\n", "
firstName
\n", "
2 (object)
\n", "
\n", " \n", " not nullable\n", " \n", "
\n", "
\n", "
lastName
\n", "
3 (object)
\n", "
\n", " \n", " not nullable\n", " \n", "
\n", "
\n", "
billingId
\n", "
4 (float64)
\n", "
\n", " \n", " not nullable\n", " \n", "
\n", "
\n", "
product
\n", "
5 (object)
\n", "
\n", " \n", " not nullable\n", " \n", "
\n", "
\n", "
price
\n", "
6 (float64)
\n", "
\n", " \n", " not nullable\n", " \n", "
\n", "
\n", "
birth
\n", "
7 (object)
\n", "
\n", " \n", " not nullable\n", " \n", "
\n", "
\n", "
dummyCol
\n", "
8 (object)
\n", "
\n", " \n", " not nullable\n", " \n", "
\n", "
\n", "
\n", " \n", " 1\n", " \n", "
\n", "
\n", "
\n", " \n", " Luis\n", " \n", "
\n", "
\n", "
\n", " \n", " Alvarez$$%!\n", " \n", "
\n", "
\n", "
\n", " \n", " 123.0\n", " \n", "
\n", "
\n", "
\n", " \n", " Cake\n", " \n", "
\n", "
\n", "
\n", " \n", " 10.0\n", " \n", "
\n", "
\n", "
\n", " \n", " 1980/07/07\n", " \n", "
\n", "
\n", "
\n", " \n", " never\n", " \n", "
\n", "
\n", "
\n", " \n", " 2\n", " \n", "
\n", "
\n", "
\n", " \n", " André\n", " \n", "
\n", "
\n", "
\n", " \n", " Ampère\n", " \n", "
\n", "
\n", "
\n", " \n", " 423.0\n", " \n", "
\n", "
\n", "
\n", " \n", " piza\n", " \n", "
\n", "
\n", "
\n", " \n", " 8.0\n", " \n", "
\n", "
\n", "
\n", " \n", " 1950/07/08\n", " \n", "
\n", "
\n", "
\n", " \n", " gonna\n", " \n", "
\n", "
\n", "
\n", " \n", " 3\n", " \n", "
\n", "
\n", "
\n", " \n", " NiELS\n", " \n", "
\n", "
\n", "
\n", " \n", " Böhr//((%%\n", " \n", "
\n", "
\n", "
\n", " \n", " 551.0\n", " \n", "
\n", "
\n", "
\n", " \n", " pizza\n", " \n", "
\n", "
\n", "
\n", " \n", " 8.0\n", " \n", "
\n", "
\n", "
\n", " \n", " 1990/07/09\n", " \n", "
\n", "
\n", "
\n", " \n", " give\n", " \n", "
\n", "
\n", "
\n", " \n", " 4\n", " \n", "
\n", "
\n", "
\n", " \n", " PAUL\n", " \n", "
\n", "
\n", "
\n", " \n", " dirac$\n", " \n", "
\n", "
\n", "
\n", " \n", " 521.0\n", " \n", "
\n", "
\n", "
\n", " \n", " pizza\n", " \n", "
\n", "
\n", "
\n", " \n", " 8.0\n", " \n", "
\n", "
\n", "
\n", " \n", " 1954/07/10\n", " \n", "
\n", "
\n", "
\n", " \n", " you\n", " \n", "
\n", "
\n", "
\n", " \n", " 5\n", " \n", "
\n", "
\n", "
\n", " \n", " Albert\n", " \n", "
\n", "
\n", "
\n", " \n", " Einstein\n", " \n", "
\n", "
\n", "
\n", " \n", " 634.0\n", " \n", "
\n", "
\n", "
\n", " \n", " pizza\n", " \n", "
\n", "
\n", "
\n", " \n", " 8.0\n", " \n", "
\n", "
\n", "
\n", " \n", " 1990/07/11\n", " \n", "
\n", "
\n", "
\n", " \n", " up\n", " \n", "
\n", "
\n", "
\n", " \n", " 6\n", " \n", "
\n", "
\n", "
\n", " \n", " Galileo\n", " \n", "
\n", "
\n", "
\n", " \n", " ⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅GALiLEI\n", " \n", "
\n", "
\n", "
\n", " \n", " 672.0\n", " \n", "
\n", "
\n", "
\n", " \n", " arepa\n", " \n", "
\n", "
\n", "
\n", " \n", " 5.0\n", " \n", "
\n", "
\n", "
\n", " \n", " 1930/08/12\n", " \n", "
\n", "
\n", "
\n", " \n", " never\n", " \n", "
\n", "
\n", "
\n", " \n", " 7\n", " \n", "
\n", "
\n", "
\n", " \n", " CaRL\n", " \n", "
\n", "
\n", "
\n", " \n", " Ga%%%uss\n", " \n", "
\n", "
\n", "
\n", " \n", " 323.0\n", " \n", "
\n", "
\n", "
\n", " \n", " taco\n", " \n", "
\n", "
\n", "
\n", " \n", " 3.0\n", " \n", "
\n", "
\n", "
\n", " \n", " 1970/07/13\n", " \n", "
\n", "
\n", "
\n", " \n", " gonna\n", " \n", "
\n", "
\n", "
\n", " \n", " 8\n", " \n", "
\n", "
\n", "
\n", " \n", " David\n", " \n", "
\n", "
\n", "
\n", " \n", " H$$$ilbert\n", " \n", "
\n", "
\n", "
\n", " \n", " 624.0\n", " \n", "
\n", "
\n", "
\n", " \n", " taaaccoo\n", " \n", "
\n", "
\n", "
\n", " \n", " 3.0\n", " \n", "
\n", "
\n", "
\n", " \n", " 1950/07/14\n", " \n", "
\n", "
\n", "
\n", " \n", " let\n", " \n", "
\n", "
\n", "
\n", " \n", " 9\n", " \n", "
\n", "
\n", "
\n", " \n", " Johannes\n", " \n", "
\n", "
\n", "
\n", " \n", " KEPLER\n", " \n", "
\n", "
\n", "
\n", " \n", " 735.0\n", " \n", "
\n", "
\n", "
\n", " \n", " taco\n", " \n", "
\n", "
\n", "
\n", " \n", " 3.0\n", " \n", "
\n", "
\n", "
\n", " \n", " 1920/04/22\n", " \n", "
\n", "
\n", "
\n", " \n", " you\n", " \n", "
\n", "
\n", "
\n", " \n", " 10\n", " \n", "
\n", "
\n", "
\n", " \n", " JaMES\n", " \n", "
\n", "
\n", "
\n", " \n", " M$$ax%%well\n", " \n", "
\n", "
\n", "
\n", " \n", " 875.0\n", " \n", "
\n", "
\n", "
\n", " \n", " taco\n", " \n", "
\n", "
\n", "
\n", " \n", " 3.0\n", " \n", "
\n", "
\n", "
\n", " \n", " 1923/03/12\n", " \n", "
\n", "
\n", "
\n", " \n", " down\n", " \n", "
\n", "
\n", "
\n", " \n", " 11\n", " \n", "
\n", "
\n", "
\n", " \n", " Isaac\n", " \n", "
\n", "
\n", "
\n", " \n", " Newton\n", " \n", "
\n", "
\n", "
\n", " \n", " 992.0\n", " \n", "
\n", "
\n", "
\n", " \n", " pasta\n", " \n", "
\n", "
\n", "
\n", " \n", " 9.0\n", " \n", "
\n", "
\n", "
\n", " \n", " 1999/02/15\n", " \n", "
\n", "
\n", "
\n", " \n", " never⋅\n", " \n", "
\n", "
\n", "
\n", " \n", " 12\n", " \n", "
\n", "
\n", "
\n", " \n", " Emmy%%\n", " \n", "
\n", "
\n", "
\n", " \n", " Nöether$\n", " \n", "
\n", "
\n", "
\n", " \n", " 234.0\n", " \n", "
\n", "
\n", "
\n", " \n", " pasta\n", " \n", "
\n", "
\n", "
\n", " \n", " 9.0\n", " \n", "
\n", "
\n", "
\n", " \n", " 1993/12/08\n", " \n", "
\n", "
\n", "
\n", " \n", " gonna\n", " \n", "
\n", "
\n", "
\n", " \n", " 13\n", " \n", "
\n", "
\n", "
\n", " \n", " nan\n", " \n", "
\n", "
\n", "
\n", " \n", " nan\n", " \n", "
\n", "
\n", "
\n", " \n", " nan\n", " \n", "
\n", "
\n", "
\n", " \n", " nan\n", " \n", "
\n", "
\n", "
\n", " \n", " nan\n", " \n", "
\n", "
\n", "
\n", " \n", " nan\n", " \n", "
\n", "
\n", "
\n", " \n", " nan\n", " \n", "
\n", "
\n", "\n", "\n", "
Viewing 13 of 13 rows / 13 columns
\n", "
1 partition(s)
\n" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "df.ext.display(13)" ] }, { "cell_type": "code", "execution_count": 64, "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n", "\n", "\n", "\n", "\n", "
Viewing 13 of 13 rows / 13 columns
\n", "
1 partition(s)
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
\n", "
id
\n", "
1 (int64)
\n", "
\n", " \n", " not nullable\n", " \n", "
\n", "
\n", "
firstName
\n", "
2 (object)
\n", "
\n", " \n", " not nullable\n", " \n", "
\n", "
\n", "
lastName
\n", "
3 (object)
\n", "
\n", " \n", " not nullable\n", " \n", "
\n", "
\n", "
billingId
\n", "
4 (float64)
\n", "
\n", " \n", " not nullable\n", " \n", "
\n", "
\n", "
product
\n", "
5 (object)
\n", "
\n", " \n", " not nullable\n", " \n", "
\n", "
\n", "
price
\n", "
6 (float64)
\n", "
\n", " \n", " not nullable\n", " \n", "
\n", "
\n", "
birth
\n", "
7 (object)
\n", "
\n", " \n", " not nullable\n", " \n", "
\n", "
\n", "
dummyCol
\n", "
8 (object)
\n", "
\n", " \n", " not nullable\n", " \n", "
\n", "
\n", "
hola
\n", "
9 (float64)
\n", "
\n", " \n", " not nullable\n", " \n", "
\n", "
\n", "
\n", " \n", " 1\n", " \n", "
\n", "
\n", "
\n", " \n", " Luis\n", " \n", "
\n", "
\n", "
\n", " \n", " Alvarez$$%!\n", " \n", "
\n", "
\n", "
\n", " \n", " 123.0\n", " \n", "
\n", "
\n", "
\n", " \n", " Cake\n", " \n", "
\n", "
\n", "
\n", " \n", " 10.0\n", " \n", "
\n", "
\n", "
\n", " \n", " 1980/07/07\n", " \n", "
\n", "
\n", "
\n", " \n", " never\n", " \n", "
\n", "
\n", "
\n", " \n", " 123.0\n", " \n", "
\n", "
\n", "
\n", " \n", " 2\n", " \n", "
\n", "
\n", "
\n", " \n", " André\n", " \n", "
\n", "
\n", "
\n", " \n", " Ampère\n", " \n", "
\n", "
\n", "
\n", " \n", " 423.0\n", " \n", "
\n", "
\n", "
\n", " \n", " piza\n", " \n", "
\n", "
\n", "
\n", " \n", " 8.0\n", " \n", "
\n", "
\n", "
\n", " \n", " 1950/07/08\n", " \n", "
\n", "
\n", "
\n", " \n", " gonna\n", " \n", "
\n", "
\n", "
\n", " \n", " 423.0\n", " \n", "
\n", "
\n", "
\n", " \n", " 3\n", " \n", "
\n", "
\n", "
\n", " \n", " NiELS\n", " \n", "
\n", "
\n", "
\n", " \n", " Böhr//((%%\n", " \n", "
\n", "
\n", "
\n", " \n", " 551.0\n", " \n", "
\n", "
\n", "
\n", " \n", " pizza\n", " \n", "
\n", "
\n", "
\n", " \n", " 8.0\n", " \n", "
\n", "
\n", "
\n", " \n", " 1990/07/09\n", " \n", "
\n", "
\n", "
\n", " \n", " give\n", " \n", "
\n", "
\n", "
\n", " \n", " 551.0\n", " \n", "
\n", "
\n", "
\n", " \n", " 4\n", " \n", "
\n", "
\n", "
\n", " \n", " PAUL\n", " \n", "
\n", "
\n", "
\n", " \n", " dirac$\n", " \n", "
\n", "
\n", "
\n", " \n", " 521.0\n", " \n", "
\n", "
\n", "
\n", " \n", " pizza\n", " \n", "
\n", "
\n", "
\n", " \n", " 8.0\n", " \n", "
\n", "
\n", "
\n", " \n", " 1954/07/10\n", " \n", "
\n", "
\n", "
\n", " \n", " you\n", " \n", "
\n", "
\n", "
\n", " \n", " 521.0\n", " \n", "
\n", "
\n", "
\n", " \n", " 5\n", " \n", "
\n", "
\n", "
\n", " \n", " Albert\n", " \n", "
\n", "
\n", "
\n", " \n", " Einstein\n", " \n", "
\n", "
\n", "
\n", " \n", " 634.0\n", " \n", "
\n", "
\n", "
\n", " \n", " pizza\n", " \n", "
\n", "
\n", "
\n", " \n", " 8.0\n", " \n", "
\n", "
\n", "
\n", " \n", " 1990/07/11\n", " \n", "
\n", "
\n", "
\n", " \n", " up\n", " \n", "
\n", "
\n", "
\n", " \n", " 634.0\n", " \n", "
\n", "
\n", "
\n", " \n", " 6\n", " \n", "
\n", "
\n", "
\n", " \n", " Galileo\n", " \n", "
\n", "
\n", "
\n", " \n", " ⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅GALiLEI\n", " \n", "
\n", "
\n", "
\n", " \n", " 672.0\n", " \n", "
\n", "
\n", "
\n", " \n", " arepa\n", " \n", "
\n", "
\n", "
\n", " \n", " 5.0\n", " \n", "
\n", "
\n", "
\n", " \n", " 1930/08/12\n", " \n", "
\n", "
\n", "
\n", " \n", " never\n", " \n", "
\n", "
\n", "
\n", " \n", " 672.0\n", " \n", "
\n", "
\n", "
\n", " \n", " 7\n", " \n", "
\n", "
\n", "
\n", " \n", " CaRL\n", " \n", "
\n", "
\n", "
\n", " \n", " Ga%%%uss\n", " \n", "
\n", "
\n", "
\n", " \n", " 323.0\n", " \n", "
\n", "
\n", "
\n", " \n", " taco\n", " \n", "
\n", "
\n", "
\n", " \n", " 3.0\n", " \n", "
\n", "
\n", "
\n", " \n", " 1970/07/13\n", " \n", "
\n", "
\n", "
\n", " \n", " gonna\n", " \n", "
\n", "
\n", "
\n", " \n", " 323.0\n", " \n", "
\n", "
\n", "
\n", " \n", " 8\n", " \n", "
\n", "
\n", "
\n", " \n", " David\n", " \n", "
\n", "
\n", "
\n", " \n", " H$$$ilbert\n", " \n", "
\n", "
\n", "
\n", " \n", " 624.0\n", " \n", "
\n", "
\n", "
\n", " \n", " taaaccoo\n", " \n", "
\n", "
\n", "
\n", " \n", " 3.0\n", " \n", "
\n", "
\n", "
\n", " \n", " 1950/07/14\n", " \n", "
\n", "
\n", "
\n", " \n", " let\n", " \n", "
\n", "
\n", "
\n", " \n", " 624.0\n", " \n", "
\n", "
\n", "
\n", " \n", " 9\n", " \n", "
\n", "
\n", "
\n", " \n", " Johannes\n", " \n", "
\n", "
\n", "
\n", " \n", " KEPLER\n", " \n", "
\n", "
\n", "
\n", " \n", " 735.0\n", " \n", "
\n", "
\n", "
\n", " \n", " taco\n", " \n", "
\n", "
\n", "
\n", " \n", " 3.0\n", " \n", "
\n", "
\n", "
\n", " \n", " 1920/04/22\n", " \n", "
\n", "
\n", "
\n", " \n", " you\n", " \n", "
\n", "
\n", "
\n", " \n", " 735.0\n", " \n", "
\n", "
\n", "
\n", " \n", " 10\n", " \n", "
\n", "
\n", "
\n", " \n", " JaMES\n", " \n", "
\n", "
\n", "
\n", " \n", " M$$ax%%well\n", " \n", "
\n", "
\n", "
\n", " \n", " 875.0\n", " \n", "
\n", "
\n", "
\n", " \n", " taco\n", " \n", "
\n", "
\n", "
\n", " \n", " 3.0\n", " \n", "
\n", "
\n", "
\n", " \n", " 1923/03/12\n", " \n", "
\n", "
\n", "
\n", " \n", " down\n", " \n", "
\n", "
\n", "
\n", " \n", " 875.0\n", " \n", "
\n", "
\n", "
\n", " \n", " 11\n", " \n", "
\n", "
\n", "
\n", " \n", " Isaac\n", " \n", "
\n", "
\n", "
\n", " \n", " Newton\n", " \n", "
\n", "
\n", "
\n", " \n", " 992.0\n", " \n", "
\n", "
\n", "
\n", " \n", " pasta\n", " \n", "
\n", "
\n", "
\n", " \n", " 9.0\n", " \n", "
\n", "
\n", "
\n", " \n", " 1999/02/15\n", " \n", "
\n", "
\n", "
\n", " \n", " never⋅\n", " \n", "
\n", "
\n", "
\n", " \n", " 992.0\n", " \n", "
\n", "
\n", "
\n", " \n", " 12\n", " \n", "
\n", "
\n", "
\n", " \n", " Emmy%%\n", " \n", "
\n", "
\n", "
\n", " \n", " Nöether$\n", " \n", "
\n", "
\n", "
\n", " \n", " 234.0\n", " \n", "
\n", "
\n", "
\n", " \n", " pasta\n", " \n", "
\n", "
\n", "
\n", " \n", " 9.0\n", " \n", "
\n", "
\n", "
\n", " \n", " 1993/12/08\n", " \n", "
\n", "
\n", "
\n", " \n", " gonna\n", " \n", "
\n", "
\n", "
\n", " \n", " 234.0\n", " \n", "
\n", "
\n", "
\n", " \n", " 13\n", " \n", "
\n", "
\n", "
\n", " \n", " nan\n", " \n", "
\n", "
\n", "
\n", " \n", " nan\n", " \n", "
\n", "
\n", "
\n", " \n", " nan\n", " \n", "
\n", "
\n", "
\n", " \n", " nan\n", " \n", "
\n", "
\n", "
\n", " \n", " nan\n", " \n", "
\n", "
\n", "
\n", " \n", " nan\n", " \n", "
\n", "
\n", "
\n", " \n", " nan\n", " \n", "
\n", "
\n", "
\n", " \n", " 558.9166666666666\n", " \n", "
\n", "
\n", "\n", "\n", "
Viewing 13 of 13 rows / 13 columns
\n", "
1 partition(s)
\n" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "df.cols.impute(\"billingId\",output_cols=\"hola\").ext.display(13)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "df.cols.impute(\"billingId\",output_cols=\"new_col\").ext.display(13)" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "{'billingId': 1,\n", " 'id': 0,\n", " 'dummyCol': 1,\n", " 'product': 1,\n", " 'firstName': 1,\n", " 'birth': 1,\n", " 'lastName': 1,\n", " 'price': 1}" ] }, "execution_count": 12, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.cols.count_na(\"*\")" ] }, { "cell_type": "code", "execution_count": 21, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
col_0col_1
09-2
1-3-7
206
3-18
45-5
\n", "
" ], "text/plain": [ " col_0 col_1\n", "0 9 -2\n", "1 -3 -7\n", "2 0 6\n", "3 -1 8\n", "4 5 -5" ] }, "execution_count": 21, "metadata": {}, "output_type": "execute_result" } ], "source": [ "import pandas as pd\n", "data = {'col_0': [9, -3, 0, -1, 5], 'col_1': [-2, -7, 6, 8, -5]}\n", "df = pd.DataFrame(data)\n", "df.head()" ] }, { "cell_type": "code", "execution_count": 25, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0 5\n", "1 1\n", "2 1\n", "3 1\n", "4 5\n", "Name: col_0, dtype: int64" ] }, "execution_count": 25, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df[\"col_0\"].clip( 1, 5)" ] }, { "cell_type": "code", "execution_count": 31, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
idfirstNamelastNamebillingIdproductpricebirthdummyCol
01LuisAlvarez$$%!123.0Cake10.01980/07/07never
12AndréAmpère423.0piza8.01950/07/08gonna
23NiELSBöhr//((%%551.0pizza8.01990/07/09give
34PAULdirac$521.0pizza8.01954/07/10you
45AlbertEinstein634.0pizza8.01990/07/11up
55GalileoGALiLEI672.0arepa5.01930/08/12never
65CaRLGa%%%uss323.0taco3.01970/07/13gonna
75DavidH$$$ilbert624.0taaaccoo3.01950/07/14let
85JohannesKEPLER735.0taco3.01920/04/22you
95JaMESM$$ax%%well875.0taco3.01923/03/12down
\n", "
" ], "text/plain": [ " id firstName lastName billingId product price birth \\\n", "0 1 Luis Alvarez$$%! 123.0 Cake 10.0 1980/07/07 \n", "1 2 André Ampère 423.0 piza 8.0 1950/07/08 \n", "2 3 NiELS Böhr//((%% 551.0 pizza 8.0 1990/07/09 \n", "3 4 PAUL dirac$ 521.0 pizza 8.0 1954/07/10 \n", "4 5 Albert Einstein 634.0 pizza 8.0 1990/07/11 \n", "5 5 Galileo GALiLEI 672.0 arepa 5.0 1930/08/12 \n", "6 5 CaRL Ga%%%uss 323.0 taco 3.0 1970/07/13 \n", "7 5 David H$$$ilbert 624.0 taaaccoo 3.0 1950/07/14 \n", "8 5 Johannes KEPLER 735.0 taco 3.0 1920/04/22 \n", "9 5 JaMES M$$ax%%well 875.0 taco 3.0 1923/03/12 \n", "\n", " dummyCol \n", "0 never \n", "1 gonna \n", "2 give \n", "3 you \n", "4 up \n", "5 never \n", "6 gonna \n", "7 let \n", "8 you \n", "9 down " ] }, "execution_count": 31, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.head(10)" ] }, { "cell_type": "code", "execution_count": 34, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "https://raw.githubusercontent.com/ironmussa/Optimus/master/examples/data/foo.csv\n" ] } ], "source": [ "df = op.load.csv(\"https://raw.githubusercontent.com/ironmussa/Optimus/master/examples/data/foo.csv\", sep=\",\", header=True, infer_schema='false', null_value=\"None\")" ] }, { "cell_type": "code", "execution_count": 54, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "{'min': {'id': 1, 'min': [('min', id 1\n", " dtype: int64)]}}" ] }, "execution_count": 54, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.cols.min(\"id\")" ] }, { "cell_type": "code", "execution_count": 47, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "{'min': {'id': 1.0,\n", " 'billingId': 123.0,\n", " 'price': 3.0,\n", " 'min': [('min', id 1.0\n", " billingId 123.0\n", " price 3.0\n", " dtype: float64)]}}" ] }, "execution_count": 47, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.cols.min(\"*\")" ] }, { "cell_type": "code", "execution_count": 53, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "VALUE dict_values([0.25 4.0\n", "0.50 7.0\n", "0.75 10.0\n", "Name: id, dtype: float64, 0.25 3.00\n", "0.50 8.00\n", "0.75 8.25\n", "Name: price, dtype: float64])\n" ] }, { "ename": "AttributeError", "evalue": "'dict_values' object has no attribute 'iteritems'", "output_type": "error", "traceback": [ "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[1;31mAttributeError\u001b[0m Traceback (most recent call last)", "\u001b[1;32m\u001b[0m in \u001b[0;36m\u001b[1;34m\u001b[0m\n\u001b[1;32m----> 1\u001b[1;33m \u001b[0mdf\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mcols\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0miqr\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;34m\"id\"\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;34m\"price\"\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m", "\u001b[1;32m~\\Documents\\Optimus\\optimus\\engines\\base\\dask\\columns.py\u001b[0m in \u001b[0;36miqr\u001b[1;34m(self, columns, more, relative_error)\u001b[0m\n\u001b[0;32m 122\u001b[0m \u001b[0mcheck_column_numbers\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mcolumns\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;34m\"*\"\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 123\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 124\u001b[1;33m \u001b[0mquartile\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mdf\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mcols\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mpercentile\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mcolumns\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m[\u001b[0m\u001b[1;36m0.25\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;36m0.5\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;36m0.75\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mrelative_error\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mrelative_error\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 125\u001b[0m \u001b[0mprint\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mquartile\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 126\u001b[0m \u001b[1;32mfor\u001b[0m \u001b[0mcol_name\u001b[0m \u001b[1;32min\u001b[0m \u001b[0mcolumns\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", "\u001b[1;32m~\\Documents\\Optimus\\optimus\\engines\\base\\columns.py\u001b[0m in \u001b[0;36mpercentile\u001b[1;34m(self, columns, values, relative_error)\u001b[0m\n\u001b[0;32m 158\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0mvalues\u001b[0m \u001b[1;32mis\u001b[0m \u001b[1;32mNone\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 159\u001b[0m \u001b[0mvalues\u001b[0m \u001b[1;33m=\u001b[0m \u001b[1;33m[\u001b[0m\u001b[1;36m0.5\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 160\u001b[1;33m \u001b[1;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0magg_exprs\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mcolumns\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mdf\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mfunctions\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mpercentile_agg\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mdf\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mvalues\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mrelative_error\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 161\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 162\u001b[0m \u001b[1;32mdef\u001b[0m \u001b[0mmedian\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mcolumns\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mrelative_error\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mRELATIVE_ERROR\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", "\u001b[1;32m~\\Documents\\Optimus\\optimus\\engines\\base\\columns.py\u001b[0m in \u001b[0;36magg_exprs\u001b[1;34m(self, columns, funcs, *args)\u001b[0m\n\u001b[0;32m 134\u001b[0m \u001b[1;33m:\u001b[0m\u001b[1;32mreturn\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 135\u001b[0m \"\"\"\n\u001b[1;32m--> 136\u001b[1;33m \u001b[1;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mexec_agg\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mcreate_exprs\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mcolumns\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mfuncs\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m*\u001b[0m\u001b[0margs\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 137\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 138\u001b[0m \u001b[1;33m@\u001b[0m\u001b[0mstaticmethod\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", "\u001b[1;32m~\\Documents\\Optimus\\optimus\\engines\\base\\dask\\columns.py\u001b[0m in \u001b[0;36mexec_agg\u001b[1;34m(exprs)\u001b[0m\n\u001b[0;32m 465\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0magg_name\u001b[0m \u001b[1;33m==\u001b[0m \u001b[1;34m\"percentile\"\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 466\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 467\u001b[1;33m \u001b[0magg_parsed\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mparse_percentile\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mcolumns\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mvalues\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 468\u001b[0m \u001b[1;32melif\u001b[0m \u001b[0magg_name\u001b[0m \u001b[1;33m==\u001b[0m \u001b[1;34m\"hist\"\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 469\u001b[0m \u001b[0magg_parsed\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mparse_hist\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0magg_results\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", "\u001b[1;32m~\\Documents\\Optimus\\optimus\\engines\\base\\dask\\columns.py\u001b[0m in \u001b[0;36mparse_percentile\u001b[1;34m(value)\u001b[0m\n\u001b[0;32m 444\u001b[0m \u001b[0m_result\u001b[0m \u001b[1;33m=\u001b[0m \u001b[1;33m{\u001b[0m\u001b[1;33m}\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 445\u001b[0m \u001b[0mprint\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m\"VALUE\"\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mvalue\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 446\u001b[1;33m \u001b[1;32mfor\u001b[0m \u001b[1;33m(\u001b[0m\u001b[0mp_value\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mp_result\u001b[0m\u001b[1;33m)\u001b[0m \u001b[1;32min\u001b[0m \u001b[0mvalue\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0miteritems\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 447\u001b[0m \u001b[0m_result\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0msetdefault\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mp_value\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mp_result\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 448\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n", "\u001b[1;31mAttributeError\u001b[0m: 'dict_values' object has no attribute 'iteritems'" ] } ], "source": [ "df.cols.iqr([\"id\",\"price\"])" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "{'min': {'id': 1.0, 'billingId': 123.0, 'price': 3.0}}" ] }, "execution_count": 12, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.cols.min(\"*\")" ] }, { "cell_type": "code", "execution_count": 38, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
idfirstNamelastNamebillingIdproductpricebirthdummyCol
01LuisAlvarez$$%!123.0Cake10.01980/07/07never
12AndréAmpère423.0piza8.01950/07/08gonna
23NiELSBöhr//((%%551.0pizza8.01990/07/09give
34PAULdirac$521.0pizza8.01954/07/10you
45AlbertEinstein634.0pizza8.01990/07/11up
55GalileoGALiLEI672.0arepa5.01930/08/12never
65CaRLGa%%%uss323.0taco3.01970/07/13gonna
75DavidH$$$ilbert624.0taaaccoo3.01950/07/14let
85JohannesKEPLER735.0taco3.01920/04/22you
95JaMESM$$ax%%well875.0taco3.01923/03/12down
105IsaacNewton992.0pasta9.01999/02/15never
115Emmy%%Nöether$234.0pasta9.01993/12/08gonna
\n", "
" ], "text/plain": [ " id firstName lastName billingId product price \\\n", "0 1 Luis Alvarez$$%! 123.0 Cake 10.0 \n", "1 2 André Ampère 423.0 piza 8.0 \n", "2 3 NiELS Böhr//((%% 551.0 pizza 8.0 \n", "3 4 PAUL dirac$ 521.0 pizza 8.0 \n", "4 5 Albert Einstein 634.0 pizza 8.0 \n", "5 5 Galileo GALiLEI 672.0 arepa 5.0 \n", "6 5 CaRL Ga%%%uss 323.0 taco 3.0 \n", "7 5 David H$$$ilbert 624.0 taaaccoo 3.0 \n", "8 5 Johannes KEPLER 735.0 taco 3.0 \n", "9 5 JaMES M$$ax%%well 875.0 taco 3.0 \n", "10 5 Isaac Newton 992.0 pasta 9.0 \n", "11 5 Emmy%% Nöether$ 234.0 pasta 9.0 \n", "\n", " birth dummyCol \n", "0 1980/07/07 never \n", "1 1950/07/08 gonna \n", "2 1990/07/09 give \n", "3 1954/07/10 you \n", "4 1990/07/11 up \n", "5 1930/08/12 never \n", "6 1970/07/13 gonna \n", "7 1950/07/14 let \n", "8 1920/04/22 you \n", "9 1923/03/12 down \n", "10 1999/02/15 never \n", "11 1993/12/08 gonna " ] }, "execution_count": 38, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.head(12)" ] }, { "cell_type": "code", "execution_count": 37, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
idfirstNamelastNamebillingIdproductpricebirthdummyCol
01LuisAlvarez$$%!123.0Cake10.01980/07/07never
12AndréAmpère423.0piza8.01950/07/08gonna
23NiELSBöhr//((%%551.0pizza8.01990/07/09give
34PAULdirac$521.0pizza8.01954/07/10you
45AlbertEinstein634.0pizza8.01990/07/11up
55GalileoGALiLEI672.0arepa5.01930/08/12never
65CaRLGa%%%uss323.0taco3.01970/07/13gonna
75DavidH$$$ilbert624.0taaaccoo3.01950/07/14let
85JohannesKEPLER735.0taco3.01920/04/22you
95JaMESM$$ax%%well875.0taco3.01923/03/12down
\n", "
" ], "text/plain": [ " id firstName lastName billingId product price birth \\\n", "0 1 Luis Alvarez$$%! 123.0 Cake 10.0 1980/07/07 \n", "1 2 André Ampère 423.0 piza 8.0 1950/07/08 \n", "2 3 NiELS Böhr//((%% 551.0 pizza 8.0 1990/07/09 \n", "3 4 PAUL dirac$ 521.0 pizza 8.0 1954/07/10 \n", "4 5 Albert Einstein 634.0 pizza 8.0 1990/07/11 \n", "5 5 Galileo GALiLEI 672.0 arepa 5.0 1930/08/12 \n", "6 5 CaRL Ga%%%uss 323.0 taco 3.0 1970/07/13 \n", "7 5 David H$$$ilbert 624.0 taaaccoo 3.0 1950/07/14 \n", "8 5 Johannes KEPLER 735.0 taco 3.0 1920/04/22 \n", "9 5 JaMES M$$ax%%well 875.0 taco 3.0 1923/03/12 \n", "\n", " dummyCol \n", "0 never \n", "1 gonna \n", "2 give \n", "3 you \n", "4 up \n", "5 never \n", "6 gonna \n", "7 let \n", "8 you \n", "9 down " ] }, "execution_count": 37, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.cols.clip(\"id\",1,5).head(10)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "df.cols.qcult()" ] }, { "cell_type": "code", "execution_count": 51, "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n", "\n", "\n", "\n", "\n", "
Viewing 13 of 13 rows / 13 columns
\n", "
1 partition(s)
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
\n", "
id
\n", "
1 (int64)
\n", "
\n", " \n", " not nullable\n", " \n", "
\n", "
\n", "
firstName
\n", "
2 (object)
\n", "
\n", " \n", " not nullable\n", " \n", "
\n", "
\n", "
lastName
\n", "
3 (object)
\n", "
\n", " \n", " not nullable\n", " \n", "
\n", "
\n", "
billingId
\n", "
4 (float64)
\n", "
\n", " \n", " not nullable\n", " \n", "
\n", "
\n", "
product
\n", "
5 (object)
\n", "
\n", " \n", " not nullable\n", " \n", "
\n", "
\n", "
price
\n", "
6 (float64)
\n", "
\n", " \n", " not nullable\n", " \n", "
\n", "
\n", "
birth
\n", "
7 (object)
\n", "
\n", " \n", " not nullable\n", " \n", "
\n", "
\n", "
dummyCol
\n", "
8 (object)
\n", "
\n", " \n", " not nullable\n", " \n", "
\n", "
\n", "
\n", " \n", " 1\n", " \n", "
\n", "
\n", "
\n", " \n", " Luis\n", " \n", "
\n", "
\n", "
\n", " \n", " Alvarez$$%!\n", " \n", "
\n", "
\n", "
\n", " \n", " 123.0\n", " \n", "
\n", "
\n", "
\n", " \n", " Cake\n", " \n", "
\n", "
\n", "
\n", " \n", " 10.0\n", " \n", "
\n", "
\n", "
\n", " \n", " 1980/07/07\n", " \n", "
\n", "
\n", "
\n", " \n", " never\n", " \n", "
\n", "
\n", "
\n", " \n", " 2\n", " \n", "
\n", "
\n", "
\n", " \n", " André\n", " \n", "
\n", "
\n", "
\n", " \n", " Ampère\n", " \n", "
\n", "
\n", "
\n", " \n", " 423.0\n", " \n", "
\n", "
\n", "
\n", " \n", " piza\n", " \n", "
\n", "
\n", "
\n", " \n", " 8.0\n", " \n", "
\n", "
\n", "
\n", " \n", " 1950/07/08\n", " \n", "
\n", "
\n", "
\n", " \n", " gonna\n", " \n", "
\n", "
\n", "
\n", " \n", " 3\n", " \n", "
\n", "
\n", "
\n", " \n", " NiELS\n", " \n", "
\n", "
\n", "
\n", " \n", " Böhr//((%%\n", " \n", "
\n", "
\n", "
\n", " \n", " 551.0\n", " \n", "
\n", "
\n", "
\n", " \n", " pizza\n", " \n", "
\n", "
\n", "
\n", " \n", " 8.0\n", " \n", "
\n", "
\n", "
\n", " \n", " 1990/07/09\n", " \n", "
\n", "
\n", "
\n", " \n", " give\n", " \n", "
\n", "
\n", "
\n", " \n", " 4\n", " \n", "
\n", "
\n", "
\n", " \n", " PAUL\n", " \n", "
\n", "
\n", "
\n", " \n", " dirac$\n", " \n", "
\n", "
\n", "
\n", " \n", " 521.0\n", " \n", "
\n", "
\n", "
\n", " \n", " pizza\n", " \n", "
\n", "
\n", "
\n", " \n", " 8.0\n", " \n", "
\n", "
\n", "
\n", " \n", " 1954/07/10\n", " \n", "
\n", "
\n", "
\n", " \n", " you\n", " \n", "
\n", "
\n", "
\n", " \n", " 5\n", " \n", "
\n", "
\n", "
\n", " \n", " Albert\n", " \n", "
\n", "
\n", "
\n", " \n", " Einstein\n", " \n", "
\n", "
\n", "
\n", " \n", " 634.0\n", " \n", "
\n", "
\n", "
\n", " \n", " pizza\n", " \n", "
\n", "
\n", "
\n", " \n", " 8.0\n", " \n", "
\n", "
\n", "
\n", " \n", " 1990/07/11\n", " \n", "
\n", "
\n", "
\n", " \n", " up\n", " \n", "
\n", "
\n", "
\n", " \n", " 6\n", " \n", "
\n", "
\n", "
\n", " \n", " Galileo\n", " \n", "
\n", "
\n", "
\n", " \n", " ⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅GALiLEI\n", " \n", "
\n", "
\n", "
\n", " \n", " 672.0\n", " \n", "
\n", "
\n", "
\n", " \n", " arepa\n", " \n", "
\n", "
\n", "
\n", " \n", " 5.0\n", " \n", "
\n", "
\n", "
\n", " \n", " 1930/08/12\n", " \n", "
\n", "
\n", "
\n", " \n", " never\n", " \n", "
\n", "
\n", "
\n", " \n", " 7\n", " \n", "
\n", "
\n", "
\n", " \n", " CaRL\n", " \n", "
\n", "
\n", "
\n", " \n", " Ga%%%uss\n", " \n", "
\n", "
\n", "
\n", " \n", " 323.0\n", " \n", "
\n", "
\n", "
\n", " \n", " taco\n", " \n", "
\n", "
\n", "
\n", " \n", " 3.0\n", " \n", "
\n", "
\n", "
\n", " \n", " 1970/07/13\n", " \n", "
\n", "
\n", "
\n", " \n", " gonna\n", " \n", "
\n", "
\n", "
\n", " \n", " 8\n", " \n", "
\n", "
\n", "
\n", " \n", " David\n", " \n", "
\n", "
\n", "
\n", " \n", " H$$$ilbert\n", " \n", "
\n", "
\n", "
\n", " \n", " 624.0\n", " \n", "
\n", "
\n", "
\n", " \n", " taaaccoo\n", " \n", "
\n", "
\n", "
\n", " \n", " 3.0\n", " \n", "
\n", "
\n", "
\n", " \n", " 1950/07/14\n", " \n", "
\n", "
\n", "
\n", " \n", " let\n", " \n", "
\n", "
\n", "
\n", " \n", " 9\n", " \n", "
\n", "
\n", "
\n", " \n", " Johannes\n", " \n", "
\n", "
\n", "
\n", " \n", " KEPLER\n", " \n", "
\n", "
\n", "
\n", " \n", " 735.0\n", " \n", "
\n", "
\n", "
\n", " \n", " taco\n", " \n", "
\n", "
\n", "
\n", " \n", " 3.0\n", " \n", "
\n", "
\n", "
\n", " \n", " 1920/04/22\n", " \n", "
\n", "
\n", "
\n", " \n", " you\n", " \n", "
\n", "
\n", "
\n", " \n", " 10\n", " \n", "
\n", "
\n", "
\n", " \n", " JaMES\n", " \n", "
\n", "
\n", "
\n", " \n", " M$$ax%%well\n", " \n", "
\n", "
\n", "
\n", " \n", " 875.0\n", " \n", "
\n", "
\n", "
\n", " \n", " taco\n", " \n", "
\n", "
\n", "
\n", " \n", " 3.0\n", " \n", "
\n", "
\n", "
\n", " \n", " 1923/03/12\n", " \n", "
\n", "
\n", "
\n", " \n", " down\n", " \n", "
\n", "
\n", "
\n", " \n", " 11\n", " \n", "
\n", "
\n", "
\n", " \n", " Isaac\n", " \n", "
\n", "
\n", "
\n", " \n", " Newton\n", " \n", "
\n", "
\n", "
\n", " \n", " 992.0\n", " \n", "
\n", "
\n", "
\n", " \n", " pasta\n", " \n", "
\n", "
\n", "
\n", " \n", " 9.0\n", " \n", "
\n", "
\n", "
\n", " \n", " 1999/02/15\n", " \n", "
\n", "
\n", "
\n", " \n", " never⋅\n", " \n", "
\n", "
\n", "
\n", " \n", " 12\n", " \n", "
\n", "
\n", "
\n", " \n", " Emmy%%\n", " \n", "
\n", "
\n", "
\n", " \n", " Nöether$\n", " \n", "
\n", "
\n", "
\n", " \n", " 234.0\n", " \n", "
\n", "
\n", "
\n", " \n", " pasta\n", " \n", "
\n", "
\n", "
\n", " \n", " 9.0\n", " \n", "
\n", "
\n", "
\n", " \n", " 1993/12/08\n", " \n", "
\n", "
\n", "
\n", " \n", " gonna\n", " \n", "
\n", "
\n", "
\n", " \n", " 13\n", " \n", "
\n", "
\n", "
\n", " \n", " nan\n", " \n", "
\n", "
\n", "
\n", " \n", " nan\n", " \n", "
\n", "
\n", "
\n", " \n", " 558.9166666666666\n", " \n", "
\n", "
\n", "
\n", " \n", " nan\n", " \n", "
\n", "
\n", "
\n", " \n", " nan\n", " \n", "
\n", "
\n", "
\n", " \n", " nan\n", " \n", "
\n", "
\n", "
\n", " \n", " nan\n", " \n", "
\n", "
\n", "\n", "\n", "
Viewing 13 of 13 rows / 13 columns
\n", "
1 partition(s)
\n" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "df.ext.display(13)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "pd.DataFrame([[np.nan, 'dogs', 3]], index=df.index)" ] }, { "cell_type": "code", "execution_count": 63, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " billingId price\n", "0 0.000000 1.000000\n", "1 0.345224 0.714286\n", "2 0.492520 0.714286\n", "3 0.457998 0.714286\n", "4 0.588032 0.714286\n" ] } ], "source": [ "from sklearn.preprocessing import MinMaxScaler\n", "import dask.dataframe as dd\n", "import dask.array as da\n", "\n", "scaler = MinMaxScaler()\n", "columns = ['billingId','price']\n", "b =df[columns]\n", "scaler.fit(b)\n", "c = dd.from_dask_array(da.from_array(scaler.transform(b), chunks=100),columns)\n", "\n", "print(c.head())\n", "# df.assign(e=c['price'])\n", "\n", "\n", "# # print(dd.from_dask_array(c, columns).head(10))\n", "# df[columns]= dd.from_dask_array(c)" ] }, { "cell_type": "code", "execution_count": 63, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[[0. 1. ]\n", " [0.3452244 0.71428571]\n", " [0.49252014 0.71428571]\n", " [0.4579977 0.71428571]\n", " [0.58803222 0.71428571]\n", " [0.63176064 0.28571429]\n", " [0.2301496 0. ]\n", " [0.57652474 0. ]\n", " [0.70425777 0. ]\n", " [0.86536249 0. ]\n", " [1. 0.85714286]\n", " [0.12773303 0.85714286]\n", " [ nan nan]]\n" ] } ], "source": [ "print(df1)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "df.ext.display()" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n", "\n", "\n", "\n", "\n", "
Viewing 6 of 6 rows / 6 columns
\n", "
1 partition(s)
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
\n", "
id
\n", "
1 (int64)
\n", "
\n", " \n", " not nullable\n", " \n", "
\n", "
\n", "
firstName
\n", "
2 (object)
\n", "
\n", " \n", " not nullable\n", " \n", "
\n", "
\n", "
lastName
\n", "
3 (object)
\n", "
\n", " \n", " not nullable\n", " \n", "
\n", "
\n", "
billingId
\n", "
4 (float64)
\n", "
\n", " \n", " not nullable\n", " \n", "
\n", "
\n", "
product
\n", "
5 (object)
\n", "
\n", " \n", " not nullable\n", " \n", "
\n", "
\n", "
price
\n", "
6 (float64)
\n", "
\n", " \n", " not nullable\n", " \n", "
\n", "
\n", "
birth
\n", "
7 (object)
\n", "
\n", " \n", " not nullable\n", " \n", "
\n", "
\n", "
dummyCol
\n", "
8 (object)
\n", "
\n", " \n", " not nullable\n", " \n", "
\n", "
\n", "
\n", " \n", " 7\n", " \n", "
\n", "
\n", "
\n", " \n", " CaRL\n", " \n", "
\n", "
\n", "
\n", " \n", " Ga%%%uss\n", " \n", "
\n", "
\n", "
\n", " \n", " 323.0\n", " \n", "
\n", "
\n", "
\n", " \n", " taco\n", " \n", "
\n", "
\n", "
\n", " \n", " 3.0\n", " \n", "
\n", "
\n", "
\n", " \n", " 1970/07/13\n", " \n", "
\n", "
\n", "
\n", " \n", " gonna\n", " \n", "
\n", "
\n", "
\n", " \n", " 4\n", " \n", "
\n", "
\n", "
\n", " \n", " PAUL\n", " \n", "
\n", "
\n", "
\n", " \n", " dirac$\n", " \n", "
\n", "
\n", "
\n", " \n", " 521.0\n", " \n", "
\n", "
\n", "
\n", " \n", " pizza\n", " \n", "
\n", "
\n", "
\n", " \n", " 8.0\n", " \n", "
\n", "
\n", "
\n", " \n", " 1954/07/10\n", " \n", "
\n", "
\n", "
\n", " \n", " you\n", " \n", "
\n", "
\n", "
\n", " \n", " 10\n", " \n", "
\n", "
\n", "
\n", " \n", " JaMES\n", " \n", "
\n", "
\n", "
\n", " \n", " M$$ax%%well\n", " \n", "
\n", "
\n", "
\n", " \n", " 875.0\n", " \n", "
\n", "
\n", "
\n", " \n", " taco\n", " \n", "
\n", "
\n", "
\n", " \n", " 3.0\n", " \n", "
\n", "
\n", "
\n", " \n", " 1923/03/12\n", " \n", "
\n", "
\n", "
\n", " \n", " down\n", " \n", "
\n", "
\n", "
\n", " \n", " 12\n", " \n", "
\n", "
\n", "
\n", " \n", " Emmy%%\n", " \n", "
\n", "
\n", "
\n", " \n", " Nöether$\n", " \n", "
\n", "
\n", "
\n", " \n", " 234.0\n", " \n", "
\n", "
\n", "
\n", " \n", " pasta\n", " \n", "
\n", "
\n", "
\n", " \n", " 9.0\n", " \n", "
\n", "
\n", "
\n", " \n", " 1993/12/08\n", " \n", "
\n", "
\n", "
\n", " \n", " gonna\n", " \n", "
\n", "
\n", "
\n", " \n", " 1\n", " \n", "
\n", "
\n", "
\n", " \n", " Luis\n", " \n", "
\n", "
\n", "
\n", " \n", " Alvarez$$%!\n", " \n", "
\n", "
\n", "
\n", " \n", " 123.0\n", " \n", "
\n", "
\n", "
\n", " \n", " Cake\n", " \n", "
\n", "
\n", "
\n", " \n", " 10.0\n", " \n", "
\n", "
\n", "
\n", " \n", " 1980/07/07\n", " \n", "
\n", "
\n", "
\n", " \n", " never\n", " \n", "
\n", "
\n", "
\n", " \n", " 5\n", " \n", "
\n", "
\n", "
\n", " \n", " Albert\n", " \n", "
\n", "
\n", "
\n", " \n", " Einstein\n", " \n", "
\n", "
\n", "
\n", " \n", " 634.0\n", " \n", "
\n", "
\n", "
\n", " \n", " pizza\n", " \n", "
\n", "
\n", "
\n", " \n", " 8.0\n", " \n", "
\n", "
\n", "
\n", " \n", " 1990/07/11\n", " \n", "
\n", "
\n", "
\n", " \n", " up\n", " \n", "
\n", "
\n", "\n", "\n", "
Viewing 6 of 6 rows / 6 columns
\n", "
1 partition(s)
\n" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "df.ext.sample(5).ext.display()" ] }, { "cell_type": "code", "execution_count": 23, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "..\\optimus\\engines\\dask\\extension.py:156: UserWarning: `meta` is not specified, inferred from partial data. Please provide `meta` if the result is unexpected.\n", " Before: .apply(func)\n", " After: .apply(func, meta={'x': 'f8', 'y': 'f8'}) for dataframe result\n", " or: .apply(func, meta=('x', 'f8')) for series result\n", " df_ = df.groupby(col_name).apply(lambda x: x.sample(2))\n", "distributed.worker - WARNING - Compute Failed\n", "Function: subgraph_callable\n", "args: ()\n", "kwargs: {}\n", "Exception: ValueError(\"Cannot take a larger sample than population when 'replace=False'\")\n", "\n" ] }, { "ename": "ValueError", "evalue": "Cannot take a larger sample than population when 'replace=False'", "output_type": "error", "traceback": [ "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[1;31mValueError\u001b[0m Traceback (most recent call last)", "\u001b[1;32m~\\AppData\\Roaming\\Python\\Python37\\site-packages\\pandas\\core\\groupby\\groupby.py\u001b[0m in \u001b[0;36mapply\u001b[1;34m(self, func, *args, **kwargs)\u001b[0m\n\u001b[0;32m 688\u001b[0m \u001b[1;32mtry\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 689\u001b[1;33m \u001b[0mresult\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_python_apply_general\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mf\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 690\u001b[0m \u001b[1;32mexcept\u001b[0m \u001b[0mException\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", "\u001b[1;32m~\\AppData\\Roaming\\Python\\Python37\\site-packages\\pandas\\core\\groupby\\groupby.py\u001b[0m in \u001b[0;36m_python_apply_general\u001b[1;34m(self, f)\u001b[0m\n\u001b[0;32m 706\u001b[0m keys, values, mutated = self.grouper.apply(f, self._selected_obj,\n\u001b[1;32m--> 707\u001b[1;33m self.axis)\n\u001b[0m\u001b[0;32m 708\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n", "\u001b[1;32m~\\AppData\\Roaming\\Python\\Python37\\site-packages\\pandas\\core\\groupby\\ops.py\u001b[0m in \u001b[0;36mapply\u001b[1;34m(self, f, data, axis)\u001b[0m\n\u001b[0;32m 189\u001b[0m \u001b[0mgroup_axes\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0m_get_axes\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mgroup\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 190\u001b[1;33m \u001b[0mres\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mf\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mgroup\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 191\u001b[0m \u001b[1;32mif\u001b[0m \u001b[1;32mnot\u001b[0m \u001b[0m_is_indexed_like\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mres\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mgroup_axes\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", "\u001b[1;32m~\\Documents\\Optimus\\optimus\\engines\\dask\\extension.py\u001b[0m in \u001b[0;36m\u001b[1;34m(x)\u001b[0m\n\u001b[0;32m 155\u001b[0m \u001b[0mn\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mmin\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;36m5\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mdf\u001b[0m\u001b[1;33m[\u001b[0m\u001b[0mcol_name\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mvalue_counts\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mmin\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 156\u001b[1;33m \u001b[0mdf\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mdf\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mgroupby\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mcol_name\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mapply\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;32mlambda\u001b[0m \u001b[0mx\u001b[0m\u001b[1;33m:\u001b[0m \u001b[0mx\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0msample\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;36m2\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 157\u001b[0m \u001b[1;31m# df_.index = df_.index.droplevel(0)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", "\u001b[1;32m~\\AppData\\Roaming\\Python\\Python37\\site-packages\\pandas\\core\\generic.py\u001b[0m in \u001b[0;36msample\u001b[1;34m(self, n, frac, replace, weights, random_state, axis)\u001b[0m\n\u001b[0;32m 4864\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m-> 4865\u001b[1;33m \u001b[0mlocs\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mrs\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mchoice\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0maxis_length\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0msize\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mn\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mreplace\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mreplace\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mp\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mweights\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 4866\u001b[0m \u001b[1;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mtake\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mlocs\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0maxis\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0maxis\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mis_copy\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;32mFalse\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", "\u001b[1;32mmtrand.pyx\u001b[0m in \u001b[0;36mmtrand.RandomState.choice\u001b[1;34m()\u001b[0m\n", "\u001b[1;31mValueError\u001b[0m: Cannot take a larger sample than population when 'replace=False'", "\nDuring handling of the above exception, another exception occurred:\n", "\u001b[1;31mValueError\u001b[0m Traceback (most recent call last)", "\u001b[1;32m\u001b[0m in \u001b[0;36m\u001b[1;34m\u001b[0m\n\u001b[1;32m----> 1\u001b[1;33m \u001b[0mdf\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mext\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mstratified_sample\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m\"firstName\"\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mext\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mdisplay\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m", "\u001b[1;32m~\\Documents\\Optimus\\optimus\\engines\\dask\\extension.py\u001b[0m in \u001b[0;36mdisplay\u001b[1;34m(limit, columns, title, truncate)\u001b[0m\n\u001b[0;32m 328\u001b[0m \u001b[1;32mdef\u001b[0m \u001b[0mdisplay\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mlimit\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;32mNone\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mcolumns\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;32mNone\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mtitle\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;32mNone\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mtruncate\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;32mTrue\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 329\u001b[0m \u001b[1;31m# TODO: limit, columns, title, truncate\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 330\u001b[1;33m \u001b[0mExt\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mtable\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mlimit\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mcolumns\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mtitle\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mtruncate\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 331\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 332\u001b[0m \u001b[1;33m@\u001b[0m\u001b[0mstaticmethod\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", "\u001b[1;32m~\\Documents\\Optimus\\optimus\\engines\\dask\\extension.py\u001b[0m in \u001b[0;36mtable\u001b[1;34m(limit, columns, title, truncate)\u001b[0m\n\u001b[0;32m 334\u001b[0m \u001b[1;32mtry\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 335\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0m__IPYTHON__\u001b[0m \u001b[1;32mand\u001b[0m \u001b[0mDataFrame\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0moutput\u001b[0m \u001b[1;32mis\u001b[0m \u001b[1;34m\"html\"\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 336\u001b[1;33m \u001b[0mresult\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mExt\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mtable_html\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mtitle\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mtitle\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mlimit\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mlimit\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mcolumns\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mcolumns\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mtruncate\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mtruncate\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 337\u001b[0m \u001b[0mprint_html\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mresult\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 338\u001b[0m \u001b[1;32melse\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", "\u001b[1;32m~\\Documents\\Optimus\\optimus\\engines\\dask\\extension.py\u001b[0m in \u001b[0;36mtable_html\u001b[1;34m(limit, columns, title, full, truncate, count)\u001b[0m\n\u001b[0;32m 286\u001b[0m \u001b[0mdata\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mdf\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mcols\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mselect\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mcolumns\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mext\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mto_dict\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 287\u001b[0m \u001b[1;32melse\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 288\u001b[1;33m \u001b[0mdata\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mdf\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mcols\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mselect\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mcolumns\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mrows\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mlimit\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mlimit\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mext\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mto_dict\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 289\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 290\u001b[0m \u001b[1;31m# Load the Jinja template\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", "\u001b[1;32m~\\Documents\\Optimus\\optimus\\engines\\dask\\extension.py\u001b[0m in \u001b[0;36mto_dict\u001b[1;34m()\u001b[0m\n\u001b[0;32m 66\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 67\u001b[0m \u001b[1;31m# Because asDict can return messed columns names we order\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 68\u001b[1;33m \u001b[1;32mfor\u001b[0m \u001b[0mindex\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mrow\u001b[0m \u001b[1;32min\u001b[0m \u001b[0mdf\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0miterrows\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 69\u001b[0m \u001b[1;31m# _row = row.asDict()\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 70\u001b[0m \u001b[0mr\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mcollections\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mOrderedDict\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", "\u001b[1;32m~\\Anaconda3\\lib\\site-packages\\dask\\dataframe\\core.py\u001b[0m in \u001b[0;36miterrows\u001b[1;34m(self)\u001b[0m\n\u001b[0;32m 2872\u001b[0m \u001b[1;32mdef\u001b[0m \u001b[0miterrows\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 2873\u001b[0m \u001b[1;32mfor\u001b[0m \u001b[0mi\u001b[0m \u001b[1;32min\u001b[0m \u001b[0mrange\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mnpartitions\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m-> 2874\u001b[1;33m \u001b[0mdf\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mget_partition\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mi\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mcompute\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 2875\u001b[0m \u001b[1;32mfor\u001b[0m \u001b[0mrow\u001b[0m \u001b[1;32min\u001b[0m \u001b[0mdf\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0miterrows\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 2876\u001b[0m \u001b[1;32myield\u001b[0m \u001b[0mrow\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", "\u001b[1;32m~\\Anaconda3\\lib\\site-packages\\dask\\base.py\u001b[0m in \u001b[0;36mcompute\u001b[1;34m(self, **kwargs)\u001b[0m\n\u001b[0;32m 154\u001b[0m \u001b[0mdask\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mbase\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mcompute\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 155\u001b[0m \"\"\"\n\u001b[1;32m--> 156\u001b[1;33m \u001b[1;33m(\u001b[0m\u001b[0mresult\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m)\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mcompute\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mtraverse\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;32mFalse\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 157\u001b[0m \u001b[1;32mreturn\u001b[0m \u001b[0mresult\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 158\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n", "\u001b[1;32m~\\Anaconda3\\lib\\site-packages\\dask\\base.py\u001b[0m in \u001b[0;36mcompute\u001b[1;34m(*args, **kwargs)\u001b[0m\n\u001b[0;32m 395\u001b[0m \u001b[0mkeys\u001b[0m \u001b[1;33m=\u001b[0m \u001b[1;33m[\u001b[0m\u001b[0mx\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m__dask_keys__\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m \u001b[1;32mfor\u001b[0m \u001b[0mx\u001b[0m \u001b[1;32min\u001b[0m \u001b[0mcollections\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 396\u001b[0m \u001b[0mpostcomputes\u001b[0m \u001b[1;33m=\u001b[0m \u001b[1;33m[\u001b[0m\u001b[0mx\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m__dask_postcompute__\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m \u001b[1;32mfor\u001b[0m \u001b[0mx\u001b[0m \u001b[1;32min\u001b[0m \u001b[0mcollections\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 397\u001b[1;33m \u001b[0mresults\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mschedule\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mdsk\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mkeys\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 398\u001b[0m \u001b[1;32mreturn\u001b[0m \u001b[0mrepack\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m[\u001b[0m\u001b[0mf\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mr\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m*\u001b[0m\u001b[0ma\u001b[0m\u001b[1;33m)\u001b[0m \u001b[1;32mfor\u001b[0m \u001b[0mr\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m(\u001b[0m\u001b[0mf\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0ma\u001b[0m\u001b[1;33m)\u001b[0m \u001b[1;32min\u001b[0m \u001b[0mzip\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mresults\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mpostcomputes\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 399\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n", "\u001b[1;32m~\\Anaconda3\\lib\\site-packages\\distributed\\client.py\u001b[0m in \u001b[0;36mget\u001b[1;34m(self, dsk, keys, restrictions, loose_restrictions, resources, sync, asynchronous, direct, retries, priority, fifo_timeout, actors, **kwargs)\u001b[0m\n\u001b[0;32m 2319\u001b[0m \u001b[1;32mtry\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 2320\u001b[0m results = self.gather(packed, asynchronous=asynchronous,\n\u001b[1;32m-> 2321\u001b[1;33m direct=direct)\n\u001b[0m\u001b[0;32m 2322\u001b[0m \u001b[1;32mfinally\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 2323\u001b[0m \u001b[1;32mfor\u001b[0m \u001b[0mf\u001b[0m \u001b[1;32min\u001b[0m \u001b[0mfutures\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mvalues\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", "\u001b[1;32m~\\Anaconda3\\lib\\site-packages\\distributed\\client.py\u001b[0m in \u001b[0;36mgather\u001b[1;34m(self, futures, errors, maxsize, direct, asynchronous)\u001b[0m\n\u001b[0;32m 1653\u001b[0m return self.sync(self._gather, futures, errors=errors,\n\u001b[0;32m 1654\u001b[0m \u001b[0mdirect\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mdirect\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mlocal_worker\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mlocal_worker\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m-> 1655\u001b[1;33m asynchronous=asynchronous)\n\u001b[0m\u001b[0;32m 1656\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 1657\u001b[0m \u001b[1;33m@\u001b[0m\u001b[0mgen\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mcoroutine\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", "\u001b[1;32m~\\Anaconda3\\lib\\site-packages\\distributed\\client.py\u001b[0m in \u001b[0;36msync\u001b[1;34m(self, func, *args, **kwargs)\u001b[0m\n\u001b[0;32m 671\u001b[0m \u001b[1;32mreturn\u001b[0m \u001b[0mfuture\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 672\u001b[0m \u001b[1;32melse\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 673\u001b[1;33m \u001b[1;32mreturn\u001b[0m \u001b[0msync\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mloop\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mfunc\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m*\u001b[0m\u001b[0margs\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 674\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 675\u001b[0m \u001b[1;32mdef\u001b[0m \u001b[0m__repr__\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", "\u001b[1;32m~\\Anaconda3\\lib\\site-packages\\distributed\\utils.py\u001b[0m in \u001b[0;36msync\u001b[1;34m(loop, func, *args, **kwargs)\u001b[0m\n\u001b[0;32m 275\u001b[0m \u001b[0me\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mwait\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;36m10\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 276\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0merror\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;36m0\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 277\u001b[1;33m \u001b[0msix\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mreraise\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m*\u001b[0m\u001b[0merror\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;36m0\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 278\u001b[0m \u001b[1;32melse\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 279\u001b[0m \u001b[1;32mreturn\u001b[0m \u001b[0mresult\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;36m0\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", "\u001b[1;32m~\\Anaconda3\\lib\\site-packages\\six.py\u001b[0m in \u001b[0;36mreraise\u001b[1;34m(tp, value, tb)\u001b[0m\n\u001b[0;32m 691\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0mvalue\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m__traceback__\u001b[0m \u001b[1;32mis\u001b[0m \u001b[1;32mnot\u001b[0m \u001b[0mtb\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 692\u001b[0m \u001b[1;32mraise\u001b[0m \u001b[0mvalue\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mwith_traceback\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mtb\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 693\u001b[1;33m \u001b[1;32mraise\u001b[0m \u001b[0mvalue\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 694\u001b[0m \u001b[1;32mfinally\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 695\u001b[0m \u001b[0mvalue\u001b[0m \u001b[1;33m=\u001b[0m \u001b[1;32mNone\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", "\u001b[1;32m~\\Anaconda3\\lib\\site-packages\\distributed\\utils.py\u001b[0m in \u001b[0;36mf\u001b[1;34m()\u001b[0m\n\u001b[0;32m 260\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0mtimeout\u001b[0m \u001b[1;32mis\u001b[0m \u001b[1;32mnot\u001b[0m \u001b[1;32mNone\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 261\u001b[0m \u001b[0mfuture\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mgen\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mwith_timeout\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mtimedelta\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mseconds\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mtimeout\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mfuture\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 262\u001b[1;33m \u001b[0mresult\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;36m0\u001b[0m\u001b[1;33m]\u001b[0m \u001b[1;33m=\u001b[0m \u001b[1;32myield\u001b[0m \u001b[0mfuture\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 263\u001b[0m \u001b[1;32mexcept\u001b[0m \u001b[0mException\u001b[0m \u001b[1;32mas\u001b[0m \u001b[0mexc\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 264\u001b[0m \u001b[0merror\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;36m0\u001b[0m\u001b[1;33m]\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0msys\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mexc_info\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", "\u001b[1;32m~\\Anaconda3\\lib\\site-packages\\tornado\\gen.py\u001b[0m in \u001b[0;36mrun\u001b[1;34m(self)\u001b[0m\n\u001b[0;32m 1131\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 1132\u001b[0m \u001b[1;32mtry\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m-> 1133\u001b[1;33m \u001b[0mvalue\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mfuture\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mresult\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 1134\u001b[0m \u001b[1;32mexcept\u001b[0m \u001b[0mException\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 1135\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mhad_exception\u001b[0m \u001b[1;33m=\u001b[0m \u001b[1;32mTrue\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", "\u001b[1;32m~\\Anaconda3\\lib\\site-packages\\tornado\\gen.py\u001b[0m in \u001b[0;36mrun\u001b[1;34m(self)\u001b[0m\n\u001b[0;32m 1139\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0mexc_info\u001b[0m \u001b[1;32mis\u001b[0m \u001b[1;32mnot\u001b[0m \u001b[1;32mNone\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 1140\u001b[0m \u001b[1;32mtry\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m-> 1141\u001b[1;33m \u001b[0myielded\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mgen\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mthrow\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m*\u001b[0m\u001b[0mexc_info\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 1142\u001b[0m \u001b[1;32mfinally\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 1143\u001b[0m \u001b[1;31m# Break up a reference to itself\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", "\u001b[1;32m~\\Anaconda3\\lib\\site-packages\\distributed\\client.py\u001b[0m in \u001b[0;36m_gather\u001b[1;34m(self, futures, errors, direct, local_worker)\u001b[0m\n\u001b[0;32m 1498\u001b[0m six.reraise(type(exception),\n\u001b[0;32m 1499\u001b[0m \u001b[0mexception\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m-> 1500\u001b[1;33m traceback)\n\u001b[0m\u001b[0;32m 1501\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0merrors\u001b[0m \u001b[1;33m==\u001b[0m \u001b[1;34m'skip'\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 1502\u001b[0m \u001b[0mbad_keys\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0madd\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", "\u001b[1;32m~\\Anaconda3\\lib\\site-packages\\six.py\u001b[0m in \u001b[0;36mreraise\u001b[1;34m(tp, value, tb)\u001b[0m\n\u001b[0;32m 690\u001b[0m \u001b[0mvalue\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mtp\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 691\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0mvalue\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m__traceback__\u001b[0m \u001b[1;32mis\u001b[0m \u001b[1;32mnot\u001b[0m \u001b[0mtb\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 692\u001b[1;33m \u001b[1;32mraise\u001b[0m \u001b[0mvalue\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mwith_traceback\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mtb\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 693\u001b[0m \u001b[1;32mraise\u001b[0m \u001b[0mvalue\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 694\u001b[0m \u001b[1;32mfinally\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", "\u001b[1;32m~\\Anaconda3\\lib\\site-packages\\dask\\optimization.py\u001b[0m in \u001b[0;36m__call__\u001b[1;34m(self, *args)\u001b[0m\n\u001b[0;32m 940\u001b[0m % (len(self.inkeys), len(args)))\n\u001b[0;32m 941\u001b[0m return _get_recursive(self.dsk, self.outkey,\n\u001b[1;32m--> 942\u001b[1;33m dict(zip(self.inkeys, args)))\n\u001b[0m\u001b[0;32m 943\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 944\u001b[0m \u001b[1;32mdef\u001b[0m \u001b[0m__reduce__\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", "\u001b[1;32m~\\Anaconda3\\lib\\site-packages\\dask\\core.py\u001b[0m in \u001b[0;36m_get_recursive\u001b[1;34m(dsk, x, cache)\u001b[0m\n\u001b[0;32m 130\u001b[0m \u001b[1;32mreturn\u001b[0m \u001b[0mcache\u001b[0m\u001b[1;33m[\u001b[0m\u001b[0mx\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 131\u001b[0m \u001b[1;32melif\u001b[0m \u001b[0mhashable\u001b[0m \u001b[1;32mand\u001b[0m \u001b[0mx\u001b[0m \u001b[1;32min\u001b[0m \u001b[0mdsk\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 132\u001b[1;33m \u001b[0mres\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mcache\u001b[0m\u001b[1;33m[\u001b[0m\u001b[0mx\u001b[0m\u001b[1;33m]\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0m_get_recursive\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mdsk\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mdsk\u001b[0m\u001b[1;33m[\u001b[0m\u001b[0mx\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mcache\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 133\u001b[0m \u001b[1;32mreturn\u001b[0m \u001b[0mres\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 134\u001b[0m \u001b[1;32melif\u001b[0m \u001b[0mtype\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mx\u001b[0m\u001b[1;33m)\u001b[0m \u001b[1;32mis\u001b[0m \u001b[0mtuple\u001b[0m \u001b[1;32mand\u001b[0m \u001b[0mx\u001b[0m \u001b[1;32mand\u001b[0m \u001b[0mcallable\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mx\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;36m0\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m \u001b[1;31m# istask\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", "\u001b[1;32m~\\Anaconda3\\lib\\site-packages\\dask\\core.py\u001b[0m in \u001b[0;36m_get_recursive\u001b[1;34m(dsk, x, cache)\u001b[0m\n\u001b[0;32m 134\u001b[0m \u001b[1;32melif\u001b[0m \u001b[0mtype\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mx\u001b[0m\u001b[1;33m)\u001b[0m \u001b[1;32mis\u001b[0m \u001b[0mtuple\u001b[0m \u001b[1;32mand\u001b[0m \u001b[0mx\u001b[0m \u001b[1;32mand\u001b[0m \u001b[0mcallable\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mx\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;36m0\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m \u001b[1;31m# istask\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 135\u001b[0m \u001b[0mfunc\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0margs\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mx\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;36m0\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mx\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;36m1\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 136\u001b[1;33m \u001b[0margs2\u001b[0m \u001b[1;33m=\u001b[0m \u001b[1;33m[\u001b[0m\u001b[0m_get_recursive\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mdsk\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mk\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mcache\u001b[0m\u001b[1;33m)\u001b[0m \u001b[1;32mfor\u001b[0m \u001b[0mk\u001b[0m \u001b[1;32min\u001b[0m \u001b[0margs\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 137\u001b[0m \u001b[1;32mreturn\u001b[0m \u001b[0mfunc\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m*\u001b[0m\u001b[0margs2\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 138\u001b[0m \u001b[1;32mreturn\u001b[0m \u001b[0mx\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", "\u001b[1;32m~\\Anaconda3\\lib\\site-packages\\dask\\core.py\u001b[0m in \u001b[0;36m\u001b[1;34m(.0)\u001b[0m\n\u001b[0;32m 134\u001b[0m \u001b[1;32melif\u001b[0m \u001b[0mtype\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mx\u001b[0m\u001b[1;33m)\u001b[0m \u001b[1;32mis\u001b[0m \u001b[0mtuple\u001b[0m \u001b[1;32mand\u001b[0m \u001b[0mx\u001b[0m \u001b[1;32mand\u001b[0m \u001b[0mcallable\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mx\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;36m0\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m \u001b[1;31m# istask\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 135\u001b[0m \u001b[0mfunc\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0margs\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mx\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;36m0\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mx\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;36m1\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 136\u001b[1;33m \u001b[0margs2\u001b[0m \u001b[1;33m=\u001b[0m \u001b[1;33m[\u001b[0m\u001b[0m_get_recursive\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mdsk\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mk\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mcache\u001b[0m\u001b[1;33m)\u001b[0m \u001b[1;32mfor\u001b[0m \u001b[0mk\u001b[0m \u001b[1;32min\u001b[0m \u001b[0margs\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 137\u001b[0m \u001b[1;32mreturn\u001b[0m \u001b[0mfunc\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m*\u001b[0m\u001b[0margs2\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 138\u001b[0m \u001b[1;32mreturn\u001b[0m \u001b[0mx\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", "\u001b[1;32m~\\Anaconda3\\lib\\site-packages\\dask\\core.py\u001b[0m in \u001b[0;36m_get_recursive\u001b[1;34m(dsk, x, cache)\u001b[0m\n\u001b[0;32m 135\u001b[0m \u001b[0mfunc\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0margs\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mx\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;36m0\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mx\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;36m1\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 136\u001b[0m \u001b[0margs2\u001b[0m \u001b[1;33m=\u001b[0m \u001b[1;33m[\u001b[0m\u001b[0m_get_recursive\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mdsk\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mk\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mcache\u001b[0m\u001b[1;33m)\u001b[0m \u001b[1;32mfor\u001b[0m \u001b[0mk\u001b[0m \u001b[1;32min\u001b[0m \u001b[0margs\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 137\u001b[1;33m \u001b[1;32mreturn\u001b[0m \u001b[0mfunc\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m*\u001b[0m\u001b[0margs2\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 138\u001b[0m \u001b[1;32mreturn\u001b[0m \u001b[0mx\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 139\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n", "\u001b[1;32m~\\Anaconda3\\lib\\site-packages\\dask\\dataframe\\core.py\u001b[0m in \u001b[0;36mapply_and_enforce\u001b[1;34m(func, args, kwargs, meta)\u001b[0m\n\u001b[0;32m 3682\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 3683\u001b[0m Ensures the output has the same columns, even if empty.\"\"\"\n\u001b[1;32m-> 3684\u001b[1;33m \u001b[0mdf\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mfunc\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m*\u001b[0m\u001b[0margs\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 3685\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0misinstance\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mdf\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m(\u001b[0m\u001b[0mpd\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mDataFrame\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mpd\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mSeries\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mpd\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mIndex\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 3686\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0mlen\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mdf\u001b[0m\u001b[1;33m)\u001b[0m \u001b[1;33m==\u001b[0m \u001b[1;36m0\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", "\u001b[1;32m~\\Anaconda3\\lib\\site-packages\\dask\\dataframe\\groupby.py\u001b[0m in \u001b[0;36m_groupby_slice_apply\u001b[1;34m(df, grouper, key, func, *args, **kwargs)\u001b[0m\n\u001b[0;32m 145\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0mkey\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 146\u001b[0m \u001b[0mg\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mg\u001b[0m\u001b[1;33m[\u001b[0m\u001b[0mkey\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 147\u001b[1;33m \u001b[1;32mreturn\u001b[0m \u001b[0mg\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mapply\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mfunc\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m*\u001b[0m\u001b[0margs\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 148\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 149\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n", "\u001b[1;32m~\\AppData\\Roaming\\Python\\Python37\\site-packages\\pandas\\core\\groupby\\groupby.py\u001b[0m in \u001b[0;36mapply\u001b[1;34m(self, func, *args, **kwargs)\u001b[0m\n\u001b[0;32m 699\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 700\u001b[0m \u001b[1;32mwith\u001b[0m \u001b[0m_group_selection_context\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 701\u001b[1;33m \u001b[1;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_python_apply_general\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mf\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 702\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 703\u001b[0m \u001b[1;32mreturn\u001b[0m \u001b[0mresult\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", "\u001b[1;32m~\\AppData\\Roaming\\Python\\Python37\\site-packages\\pandas\\core\\groupby\\groupby.py\u001b[0m in \u001b[0;36m_python_apply_general\u001b[1;34m(self, f)\u001b[0m\n\u001b[0;32m 705\u001b[0m \u001b[1;32mdef\u001b[0m \u001b[0m_python_apply_general\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mf\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 706\u001b[0m keys, values, mutated = self.grouper.apply(f, self._selected_obj,\n\u001b[1;32m--> 707\u001b[1;33m self.axis)\n\u001b[0m\u001b[0;32m 708\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 709\u001b[0m return self._wrap_applied_output(\n", "\u001b[1;32m~\\AppData\\Roaming\\Python\\Python37\\site-packages\\pandas\\core\\groupby\\ops.py\u001b[0m in \u001b[0;36mapply\u001b[1;34m(self, f, data, axis)\u001b[0m\n\u001b[0;32m 188\u001b[0m \u001b[1;31m# group might be modified\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 189\u001b[0m \u001b[0mgroup_axes\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0m_get_axes\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mgroup\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 190\u001b[1;33m \u001b[0mres\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mf\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mgroup\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 191\u001b[0m \u001b[1;32mif\u001b[0m \u001b[1;32mnot\u001b[0m \u001b[0m_is_indexed_like\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mres\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mgroup_axes\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 192\u001b[0m \u001b[0mmutated\u001b[0m \u001b[1;33m=\u001b[0m \u001b[1;32mTrue\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", "\u001b[1;32m~\\Documents\\Optimus\\optimus\\engines\\dask\\extension.py\u001b[0m in \u001b[0;36m\u001b[1;34m(x)\u001b[0m\n\u001b[0;32m 154\u001b[0m \u001b[0mdf\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 155\u001b[0m \u001b[0mn\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mmin\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;36m5\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mdf\u001b[0m\u001b[1;33m[\u001b[0m\u001b[0mcol_name\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mvalue_counts\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mmin\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 156\u001b[1;33m \u001b[0mdf\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mdf\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mgroupby\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mcol_name\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mapply\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;32mlambda\u001b[0m \u001b[0mx\u001b[0m\u001b[1;33m:\u001b[0m \u001b[0mx\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0msample\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;36m2\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 157\u001b[0m \u001b[1;31m# df_.index = df_.index.droplevel(0)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 158\u001b[0m \u001b[1;32mreturn\u001b[0m \u001b[0mdf\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", "\u001b[1;32m~\\AppData\\Roaming\\Python\\Python37\\site-packages\\pandas\\core\\generic.py\u001b[0m in \u001b[0;36msample\u001b[1;34m(self, n, frac, replace, weights, random_state, axis)\u001b[0m\n\u001b[0;32m 4863\u001b[0m \"provide positive value.\")\n\u001b[0;32m 4864\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m-> 4865\u001b[1;33m \u001b[0mlocs\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mrs\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mchoice\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0maxis_length\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0msize\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mn\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mreplace\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mreplace\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mp\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mweights\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 4866\u001b[0m \u001b[1;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mtake\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mlocs\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0maxis\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0maxis\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mis_copy\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;32mFalse\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 4867\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n", "\u001b[1;32mmtrand.pyx\u001b[0m in \u001b[0;36mmtrand.RandomState.choice\u001b[1;34m()\u001b[0m\n", "\u001b[1;31mValueError\u001b[0m: Cannot take a larger sample than population when 'replace=False'" ] } ], "source": [ "df.ext.stratified_sample(\"firstName\").ext.display()" ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n", "\n", "\n", "\n", "\n", "
Viewing 6 of 6 rows / 6 columns
\n", "
1 partition(s)
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
\n", "
id
\n", "
1 (int64)
\n", "
\n", " \n", " not nullable\n", " \n", "
\n", "
\n", "
firstName
\n", "
2 (object)
\n", "
\n", " \n", " not nullable\n", " \n", "
\n", "
\n", "
lastName
\n", "
3 (object)
\n", "
\n", " \n", " not nullable\n", " \n", "
\n", "
\n", "
billingId
\n", "
4 (float64)
\n", "
\n", " \n", " not nullable\n", " \n", "
\n", "
\n", "
product
\n", "
5 (object)
\n", "
\n", " \n", " not nullable\n", " \n", "
\n", "
\n", "
price
\n", "
6 (float64)
\n", "
\n", " \n", " not nullable\n", " \n", "
\n", "
\n", "
birth
\n", "
7 (object)
\n", "
\n", " \n", " not nullable\n", " \n", "
\n", "
\n", "
dummyCol
\n", "
8 (object)
\n", "
\n", " \n", " not nullable\n", " \n", "
\n", "
\n", "
\n", " \n", " 1\n", " \n", "
\n", "
\n", "
\n", " \n", " Luis\n", " \n", "
\n", "
\n", "
\n", " \n", " Alvarez$$%!\n", " \n", "
\n", "
\n", "
\n", " \n", " 123.0\n", " \n", "
\n", "
\n", "
\n", " \n", " Cake\n", " \n", "
\n", "
\n", "
\n", " \n", " 10.0\n", " \n", "
\n", "
\n", "
\n", " \n", " 1980/07/07\n", " \n", "
\n", "
\n", "
\n", " \n", " never\n", " \n", "
\n", "
\n", "
\n", " \n", " 2\n", " \n", "
\n", "
\n", "
\n", " \n", " André\n", " \n", "
\n", "
\n", "
\n", " \n", " Ampère\n", " \n", "
\n", "
\n", "
\n", " \n", " 423.0\n", " \n", "
\n", "
\n", "
\n", " \n", " piza\n", " \n", "
\n", "
\n", "
\n", " \n", " 8.0\n", " \n", "
\n", "
\n", "
\n", " \n", " 1950/07/08\n", " \n", "
\n", "
\n", "
\n", " \n", " gonna\n", " \n", "
\n", "
\n", "
\n", " \n", " 3\n", " \n", "
\n", "
\n", "
\n", " \n", " NiELS\n", " \n", "
\n", "
\n", "
\n", " \n", " Böhr//((%%\n", " \n", "
\n", "
\n", "
\n", " \n", " 551.0\n", " \n", "
\n", "
\n", "
\n", " \n", " pizza\n", " \n", "
\n", "
\n", "
\n", " \n", " 8.0\n", " \n", "
\n", "
\n", "
\n", " \n", " 1990/07/09\n", " \n", "
\n", "
\n", "
\n", " \n", " give\n", " \n", "
\n", "
\n", "
\n", " \n", " 4\n", " \n", "
\n", "
\n", "
\n", " \n", " PAUL\n", " \n", "
\n", "
\n", "
\n", " \n", " dirac$\n", " \n", "
\n", "
\n", "
\n", " \n", " 521.0\n", " \n", "
\n", "
\n", "
\n", " \n", " pizza\n", " \n", "
\n", "
\n", "
\n", " \n", " 8.0\n", " \n", "
\n", "
\n", "
\n", " \n", " 1954/07/10\n", " \n", "
\n", "
\n", "
\n", " \n", " you\n", " \n", "
\n", "
\n", "
\n", " \n", " 5\n", " \n", "
\n", "
\n", "
\n", " \n", " Albert\n", " \n", "
\n", "
\n", "
\n", " \n", " Einstein\n", " \n", "
\n", "
\n", "
\n", " \n", " 634.0\n", " \n", "
\n", "
\n", "
\n", " \n", " pizza\n", " \n", "
\n", "
\n", "
\n", " \n", " 8.0\n", " \n", "
\n", "
\n", "
\n", " \n", " 1990/07/11\n", " \n", "
\n", "
\n", "
\n", " \n", " up\n", " \n", "
\n", "
\n", "
\n", " \n", " 6\n", " \n", "
\n", "
\n", "
\n", " \n", " Galileo\n", " \n", "
\n", "
\n", "
\n", " \n", " ⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅GALiLEI\n", " \n", "
\n", "
\n", "
\n", " \n", " 672.0\n", " \n", "
\n", "
\n", "
\n", " \n", " arepa\n", " \n", "
\n", "
\n", "
\n", " \n", " 5.0\n", " \n", "
\n", "
\n", "
\n", " \n", " 1930/08/12\n", " \n", "
\n", "
\n", "
\n", " \n", " never\n", " \n", "
\n", "
\n", "\n", "\n", "
Viewing 6 of 6 rows / 6 columns
\n", "
1 partition(s)
\n" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "df.rows.limit(5).ext.display()" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "ename": "TypeError", "evalue": "min() missing 1 required positional argument: 'columns'", "output_type": "error", "traceback": [ "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[1;31mTypeError\u001b[0m Traceback (most recent call last)", "\u001b[1;32m\u001b[0m in \u001b[0;36m\u001b[1;34m\u001b[0m\n\u001b[1;32m----> 1\u001b[1;33m \u001b[0mdf\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mcols\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mmin\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m", "\u001b[1;31mTypeError\u001b[0m: min() missing 1 required positional argument: 'columns'" ] } ], "source": [ "df.cols.min(\"\")" ] }, { "cell_type": "code", "execution_count": 21, "metadata": {}, "outputs": [ { "ename": "TypeError", "evalue": "Can't instantiate abstract class Cols with abstract methods apply_by_dtypes, apply_expr, astype, boxplot, bucketizer, cell, clip, copy, correlation, count_mismatch, count_na, count_uniques, count_zeros, drop, frequency_by_group, get_meta, impute, index_to_string, iqr, is_na, keep, max_abs_scaler, min_max_scaler, move, nunique, qcut, remove, remove_accents, remove_special_chars, remove_white_spaces, replace_regex, reverse, scatter, select_by_dtypes, set, set_meta, sort, string_to_index, to_timestamp, unique, value_counts, values_to_cols, years_between, z_score", "output_type": "error", "traceback": [ "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[1;31mTypeError\u001b[0m Traceback (most recent call last)", "\u001b[1;32m\u001b[0m in \u001b[0;36m\u001b[1;34m\u001b[0m\n\u001b[1;32m----> 1\u001b[1;33m \u001b[0mdf\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mrows\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0msort\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m\"billingId\"\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;34m\"asc\"\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mext\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mdisplay\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m", "\u001b[1;32m~\\Anaconda3\\lib\\site-packages\\multipledispatch\\dispatcher.py\u001b[0m in \u001b[0;36m__call__\u001b[1;34m(self, *args, **kwargs)\u001b[0m\n\u001b[0;32m 276\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_cache\u001b[0m\u001b[1;33m[\u001b[0m\u001b[0mtypes\u001b[0m\u001b[1;33m]\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mfunc\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 277\u001b[0m \u001b[1;32mtry\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 278\u001b[1;33m \u001b[1;32mreturn\u001b[0m \u001b[0mfunc\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m*\u001b[0m\u001b[0margs\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 279\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 280\u001b[0m \u001b[1;32mexcept\u001b[0m \u001b[0mMDNotImplementedError\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", "\u001b[1;32m~\\Documents\\Optimus\\optimus\\engines\\dask\\rows.py\u001b[0m in \u001b[0;36msort\u001b[1;34m(columns, order)\u001b[0m\n\u001b[0;32m 116\u001b[0m \"\"\"\n\u001b[0;32m 117\u001b[0m \u001b[0mcolumns\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mparse_columns\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mcolumns\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 118\u001b[1;33m \u001b[1;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mrows\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0msort\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mcolumns\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0morder\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 119\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 120\u001b[0m \u001b[1;33m@\u001b[0m\u001b[0mstaticmethod\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", "\u001b[1;32m~\\Anaconda3\\lib\\site-packages\\multipledispatch\\dispatcher.py\u001b[0m in \u001b[0;36m__call__\u001b[1;34m(self, *args, **kwargs)\u001b[0m\n\u001b[0;32m 276\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_cache\u001b[0m\u001b[1;33m[\u001b[0m\u001b[0mtypes\u001b[0m\u001b[1;33m]\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mfunc\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 277\u001b[0m \u001b[1;32mtry\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 278\u001b[1;33m \u001b[1;32mreturn\u001b[0m \u001b[0mfunc\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m*\u001b[0m\u001b[0margs\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 279\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 280\u001b[0m \u001b[1;32mexcept\u001b[0m \u001b[0mMDNotImplementedError\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", "\u001b[1;32m~\\Documents\\Optimus\\optimus\\engines\\dask\\rows.py\u001b[0m in \u001b[0;36msort\u001b[1;34m(col_sort)\u001b[0m\n\u001b[0;32m 145\u001b[0m \u001b[0mdf\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mdf\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mmeta\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mpreserve\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mActions\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mSORT_ROW\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mvalue\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mcol_name\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 146\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 147\u001b[1;33m \u001b[0mc\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mdf\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mcols\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mnames\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 148\u001b[0m \u001b[1;31m# It seems that is on posible to order rows in Dask using set_index. It only return data in ascendent way.\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 149\u001b[0m \u001b[1;31m# We should fins a way to make it work desc and form multiple columns\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", "\u001b[1;32m~\\Documents\\Optimus\\optimus\\engines\\dask\\columns.py\u001b[0m in \u001b[0;36mcols\u001b[1;34m(self)\u001b[0m\n\u001b[0;32m 966\u001b[0m \"\"\"\n\u001b[0;32m 967\u001b[0m \u001b[1;32mreturn\u001b[0m \u001b[0mCols\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mexec_agg\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mCols\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mcreate_exprs\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mcolumns\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mfuncs\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m*\u001b[0m\u001b[0margs\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 968\u001b[1;33m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 969\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 970\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n", "\u001b[1;31mTypeError\u001b[0m: Can't instantiate abstract class Cols with abstract methods apply_by_dtypes, apply_expr, astype, boxplot, bucketizer, cell, clip, copy, correlation, count_mismatch, count_na, count_uniques, count_zeros, drop, frequency_by_group, get_meta, impute, index_to_string, iqr, is_na, keep, max_abs_scaler, min_max_scaler, move, nunique, qcut, remove, remove_accents, remove_special_chars, remove_white_spaces, replace_regex, reverse, scatter, select_by_dtypes, set, set_meta, sort, string_to_index, to_timestamp, unique, value_counts, values_to_cols, years_between, z_score" ] } ], "source": [ "df.rows.sort(\"billingId\",\"asc\").ext.display()" ] }, { "cell_type": "code", "execution_count": 36, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "{'max': {'price': 10.0}}" ] }, "execution_count": 36, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.cols.max(\"price\")" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "\n", "df.cols.create_expr()" ] }, { "cell_type": "code", "execution_count": 30, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
Dask DataFrame Structure:
\n", "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
idfirstNamelastNamebillingIdproductpricebirthdummyCol
npartitions=1
int64objectobjectfloat64objectfloat64objectobject
........................
\n", "
\n", "
Dask Name: dropna, 4 tasks
" ], "text/plain": [ "Dask DataFrame Structure:\n", " id firstName lastName billingId product price birth dummyCol\n", "npartitions=1 \n", " int64 object object float64 object float64 object object\n", " ... ... ... ... ... ... ... ...\n", "Dask Name: dropna, 4 tasks" ] }, "execution_count": 30, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.dropna(how='any', subset=['price'])" ] }, { "cell_type": "code", "execution_count": 32, "metadata": { "scrolled": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "any price\n" ] }, { "data": { "text/html": [ "\n", "\n", "\n", "\n", "\n", "
Viewing 10 of 12 rows / 12 columns
\n", "
1 partition(s)
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
\n", "
id
\n", "
1 (int64)
\n", "
\n", " \n", " not nullable\n", " \n", "
\n", "
\n", "
firstName
\n", "
2 (object)
\n", "
\n", " \n", " not nullable\n", " \n", "
\n", "
\n", "
lastName
\n", "
3 (object)
\n", "
\n", " \n", " not nullable\n", " \n", "
\n", "
\n", "
billingId
\n", "
4 (float64)
\n", "
\n", " \n", " not nullable\n", " \n", "
\n", "
\n", "
product
\n", "
5 (object)
\n", "
\n", " \n", " not nullable\n", " \n", "
\n", "
\n", "
price
\n", "
6 (float64)
\n", "
\n", " \n", " not nullable\n", " \n", "
\n", "
\n", "
birth
\n", "
7 (object)
\n", "
\n", " \n", " not nullable\n", " \n", "
\n", "
\n", "
dummyCol
\n", "
8 (object)
\n", "
\n", " \n", " not nullable\n", " \n", "
\n", "
\n", "
\n", " \n", " 1\n", " \n", "
\n", "
\n", "
\n", " \n", " Luis\n", " \n", "
\n", "
\n", "
\n", " \n", " Alvarez$$%!\n", " \n", "
\n", "
\n", "
\n", " \n", " 123.0\n", " \n", "
\n", "
\n", "
\n", " \n", " Cake\n", " \n", "
\n", "
\n", "
\n", " \n", " 10.0\n", " \n", "
\n", "
\n", "
\n", " \n", " 1980/07/07\n", " \n", "
\n", "
\n", "
\n", " \n", " never\n", " \n", "
\n", "
\n", "
\n", " \n", " 2\n", " \n", "
\n", "
\n", "
\n", " \n", " André\n", " \n", "
\n", "
\n", "
\n", " \n", " Ampère\n", " \n", "
\n", "
\n", "
\n", " \n", " 423.0\n", " \n", "
\n", "
\n", "
\n", " \n", " piza\n", " \n", "
\n", "
\n", "
\n", " \n", " 8.0\n", " \n", "
\n", "
\n", "
\n", " \n", " 1950/07/08\n", " \n", "
\n", "
\n", "
\n", " \n", " gonna\n", " \n", "
\n", "
\n", "
\n", " \n", " 3\n", " \n", "
\n", "
\n", "
\n", " \n", " NiELS\n", " \n", "
\n", "
\n", "
\n", " \n", " Böhr//((%%\n", " \n", "
\n", "
\n", "
\n", " \n", " 551.0\n", " \n", "
\n", "
\n", "
\n", " \n", " pizza\n", " \n", "
\n", "
\n", "
\n", " \n", " 8.0\n", " \n", "
\n", "
\n", "
\n", " \n", " 1990/07/09\n", " \n", "
\n", "
\n", "
\n", " \n", " give\n", " \n", "
\n", "
\n", "
\n", " \n", " 4\n", " \n", "
\n", "
\n", "
\n", " \n", " PAUL\n", " \n", "
\n", "
\n", "
\n", " \n", " dirac$\n", " \n", "
\n", "
\n", "
\n", " \n", " 521.0\n", " \n", "
\n", "
\n", "
\n", " \n", " pizza\n", " \n", "
\n", "
\n", "
\n", " \n", " 8.0\n", " \n", "
\n", "
\n", "
\n", " \n", " 1954/07/10\n", " \n", "
\n", "
\n", "
\n", " \n", " you\n", " \n", "
\n", "
\n", "
\n", " \n", " 5\n", " \n", "
\n", "
\n", "
\n", " \n", " Albert\n", " \n", "
\n", "
\n", "
\n", " \n", " Einstein\n", " \n", "
\n", "
\n", "
\n", " \n", " 634.0\n", " \n", "
\n", "
\n", "
\n", " \n", " pizza\n", " \n", "
\n", "
\n", "
\n", " \n", " 8.0\n", " \n", "
\n", "
\n", "
\n", " \n", " 1990/07/11\n", " \n", "
\n", "
\n", "
\n", " \n", " up\n", " \n", "
\n", "
\n", "
\n", " \n", " 6\n", " \n", "
\n", "
\n", "
\n", " \n", " Galileo\n", " \n", "
\n", "
\n", "
\n", " \n", " ⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅GALiLEI\n", " \n", "
\n", "
\n", "
\n", " \n", " 672.0\n", " \n", "
\n", "
\n", "
\n", " \n", " arepa\n", " \n", "
\n", "
\n", "
\n", " \n", " 5.0\n", " \n", "
\n", "
\n", "
\n", " \n", " 1930/08/12\n", " \n", "
\n", "
\n", "
\n", " \n", " never\n", " \n", "
\n", "
\n", "
\n", " \n", " 7\n", " \n", "
\n", "
\n", "
\n", " \n", " CaRL\n", " \n", "
\n", "
\n", "
\n", " \n", " Ga%%%uss\n", " \n", "
\n", "
\n", "
\n", " \n", " 323.0\n", " \n", "
\n", "
\n", "
\n", " \n", " taco\n", " \n", "
\n", "
\n", "
\n", " \n", " 3.0\n", " \n", "
\n", "
\n", "
\n", " \n", " 1970/07/13\n", " \n", "
\n", "
\n", "
\n", " \n", " gonna\n", " \n", "
\n", "
\n", "
\n", " \n", " 8\n", " \n", "
\n", "
\n", "
\n", " \n", " David\n", " \n", "
\n", "
\n", "
\n", " \n", " H$$$ilbert\n", " \n", "
\n", "
\n", "
\n", " \n", " 624.0\n", " \n", "
\n", "
\n", "
\n", " \n", " taaaccoo\n", " \n", "
\n", "
\n", "
\n", " \n", " 3.0\n", " \n", "
\n", "
\n", "
\n", " \n", " 1950/07/14\n", " \n", "
\n", "
\n", "
\n", " \n", " let\n", " \n", "
\n", "
\n", "
\n", " \n", " 9\n", " \n", "
\n", "
\n", "
\n", " \n", " Johannes\n", " \n", "
\n", "
\n", "
\n", " \n", " KEPLER\n", " \n", "
\n", "
\n", "
\n", " \n", " 735.0\n", " \n", "
\n", "
\n", "
\n", " \n", " taco\n", " \n", "
\n", "
\n", "
\n", " \n", " 3.0\n", " \n", "
\n", "
\n", "
\n", " \n", " 1920/04/22\n", " \n", "
\n", "
\n", "
\n", " \n", " you\n", " \n", "
\n", "
\n", "
\n", " \n", " 10\n", " \n", "
\n", "
\n", "
\n", " \n", " JaMES\n", " \n", "
\n", "
\n", "
\n", " \n", " M$$ax%%well\n", " \n", "
\n", "
\n", "
\n", " \n", " 875.0\n", " \n", "
\n", "
\n", "
\n", " \n", " taco\n", " \n", "
\n", "
\n", "
\n", " \n", " 3.0\n", " \n", "
\n", "
\n", "
\n", " \n", " 1923/03/12\n", " \n", "
\n", "
\n", "
\n", " \n", " down\n", " \n", "
\n", "
\n", "
\n", " \n", " 11\n", " \n", "
\n", "
\n", "
\n", " \n", " Isaac\n", " \n", "
\n", "
\n", "
\n", " \n", " Newton\n", " \n", "
\n", "
\n", "
\n", " \n", " 992.0\n", " \n", "
\n", "
\n", "
\n", " \n", " pasta\n", " \n", "
\n", "
\n", "
\n", " \n", " 9.0\n", " \n", "
\n", "
\n", "
\n", " \n", " 1999/02/15\n", " \n", "
\n", "
\n", "
\n", " \n", " never⋅\n", " \n", "
\n", "
\n", "\n", "\n", "
Viewing 10 of 12 rows / 12 columns
\n", "
1 partition(s)
\n" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "df.rows.drop_na(\"price\").ext.display()" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [], "source": [ "c = df.cols.names()" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "C:\\Users\\argenisleon\\Anaconda3\\lib\\site-packages\\numpy\\lib\\function_base.py:3652: RuntimeWarning: Invalid value encountered in percentile\n", " interpolation=interpolation)\n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
idfirstNamelastNamebillingIdproductpricebirthdummyCol
01LuisAlvarez$$%!123.0Cake10.01980/07/07never
112Emmy%%Nöether$234.0pasta9.01993/12/08gonna
27CaRLGa%%%uss323.0taco3.01970/07/13gonna
32AndréAmpère423.0piza8.01950/07/08gonna
44PAULdirac$521.0pizza8.01954/07/10you
\n", "
" ], "text/plain": [ " id firstName lastName billingId product price birth dummyCol\n", "0 1 Luis Alvarez$$%! 123.0 Cake 10.0 1980/07/07 never\n", "1 12 Emmy%% Nöether$ 234.0 pasta 9.0 1993/12/08 gonna\n", "2 7 CaRL Ga%%%uss 323.0 taco 3.0 1970/07/13 gonna\n", "3 2 André Ampère 423.0 piza 8.0 1950/07/08 gonna\n", "4 4 PAUL dirac$ 521.0 pizza 8.0 1954/07/10 you" ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.set_index(\"billingId\").reset_index()[c].head()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": 172, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
price
price
3.03.0
5.05.0
8.08.0
9.09.0
10.010.0
\n", "
" ], "text/plain": [ " price\n", "price \n", "3.0 3.0\n", "5.0 5.0\n", "8.0 8.0\n", "9.0 9.0\n", "10.0 10.0" ] }, "execution_count": 172, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.cols.mode(\"id\")" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": 132, "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n", "\n", "\n", "\n", "\n", "
Viewing 13 of 13 rows / 13 columns
\n", "
1 partition(s)
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
\n", "
id
\n", "
1 (int64)
\n", "
\n", " \n", " not nullable\n", " \n", "
\n", "
\n", "
firstName
\n", "
2 (object)
\n", "
\n", " \n", " not nullable\n", " \n", "
\n", "
\n", "
lastName
\n", "
3 (object)
\n", "
\n", " \n", " not nullable\n", " \n", "
\n", "
\n", "
billingId
\n", "
4 (float64)
\n", "
\n", " \n", " not nullable\n", " \n", "
\n", "
\n", "
product
\n", "
5 (object)
\n", "
\n", " \n", " not nullable\n", " \n", "
\n", "
\n", "
price
\n", "
6 (float64)
\n", "
\n", " \n", " not nullable\n", " \n", "
\n", "
\n", "
birth
\n", "
7 (object)
\n", "
\n", " \n", " not nullable\n", " \n", "
\n", "
\n", "
dummyCol
\n", "
8 (object)
\n", "
\n", " \n", " not nullable\n", " \n", "
\n", "
\n", "
\n", " \n", " 1\n", " \n", "
\n", "
\n", "
\n", " \n", " Luis\n", " \n", "
\n", "
\n", "
\n", " \n", " Alvarez$$%!\n", " \n", "
\n", "
\n", "
\n", " \n", " 123.0\n", " \n", "
\n", "
\n", "
\n", " \n", " Cake\n", " \n", "
\n", "
\n", "
\n", " \n", " 10.0\n", " \n", "
\n", "
\n", "
\n", " \n", " 1980/07/07\n", " \n", "
\n", "
\n", "
\n", " \n", " never\n", " \n", "
\n", "
\n", "
\n", " \n", " 2\n", " \n", "
\n", "
\n", "
\n", " \n", " André\n", " \n", "
\n", "
\n", "
\n", " \n", " Ampère\n", " \n", "
\n", "
\n", "
\n", " \n", " 423.0\n", " \n", "
\n", "
\n", "
\n", " \n", " piza\n", " \n", "
\n", "
\n", "
\n", " \n", " 8.0\n", " \n", "
\n", "
\n", "
\n", " \n", " 1950/07/08\n", " \n", "
\n", "
\n", "
\n", " \n", " gonna\n", " \n", "
\n", "
\n", "
\n", " \n", " 3\n", " \n", "
\n", "
\n", "
\n", " \n", " NiELS\n", " \n", "
\n", "
\n", "
\n", " \n", " Böhr//((%%\n", " \n", "
\n", "
\n", "
\n", " \n", " 551.0\n", " \n", "
\n", "
\n", "
\n", " \n", " pizza\n", " \n", "
\n", "
\n", "
\n", " \n", " 8.0\n", " \n", "
\n", "
\n", "
\n", " \n", " 1990/07/09\n", " \n", "
\n", "
\n", "
\n", " \n", " give\n", " \n", "
\n", "
\n", "
\n", " \n", " 4\n", " \n", "
\n", "
\n", "
\n", " \n", " PAUL\n", " \n", "
\n", "
\n", "
\n", " \n", " dirac$\n", " \n", "
\n", "
\n", "
\n", " \n", " 521.0\n", " \n", "
\n", "
\n", "
\n", " \n", " pizza\n", " \n", "
\n", "
\n", "
\n", " \n", " 8.0\n", " \n", "
\n", "
\n", "
\n", " \n", " 1954/07/10\n", " \n", "
\n", "
\n", "
\n", " \n", " you\n", " \n", "
\n", "
\n", "
\n", " \n", " 5\n", " \n", "
\n", "
\n", "
\n", " \n", " Albert\n", " \n", "
\n", "
\n", "
\n", " \n", " Einstein\n", " \n", "
\n", "
\n", "
\n", " \n", " 634.0\n", " \n", "
\n", "
\n", "
\n", " \n", " pizza\n", " \n", "
\n", "
\n", "
\n", " \n", " 8.0\n", " \n", "
\n", "
\n", "
\n", " \n", " 1990/07/11\n", " \n", "
\n", "
\n", "
\n", " \n", " up\n", " \n", "
\n", "
\n", "
\n", " \n", " 6\n", " \n", "
\n", "
\n", "
\n", " \n", " Galileo\n", " \n", "
\n", "
\n", "
\n", " \n", " ⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅GALiLEI\n", " \n", "
\n", "
\n", "
\n", " \n", " 672.0\n", " \n", "
\n", "
\n", "
\n", " \n", " arepa\n", " \n", "
\n", "
\n", "
\n", " \n", " 5.0\n", " \n", "
\n", "
\n", "
\n", " \n", " 1930/08/12\n", " \n", "
\n", "
\n", "
\n", " \n", " never\n", " \n", "
\n", "
\n", "
\n", " \n", " 7\n", " \n", "
\n", "
\n", "
\n", " \n", " CaRL\n", " \n", "
\n", "
\n", "
\n", " \n", " Ga%%%uss\n", " \n", "
\n", "
\n", "
\n", " \n", " 323.0\n", " \n", "
\n", "
\n", "
\n", " \n", " taco\n", " \n", "
\n", "
\n", "
\n", " \n", " 3.0\n", " \n", "
\n", "
\n", "
\n", " \n", " 1970/07/13\n", " \n", "
\n", "
\n", "
\n", " \n", " gonna\n", " \n", "
\n", "
\n", "
\n", " \n", " 8\n", " \n", "
\n", "
\n", "
\n", " \n", " David\n", " \n", "
\n", "
\n", "
\n", " \n", " H$$$ilbert\n", " \n", "
\n", "
\n", "
\n", " \n", " 624.0\n", " \n", "
\n", "
\n", "
\n", " \n", " taaaccoo\n", " \n", "
\n", "
\n", "
\n", " \n", " 3.0\n", " \n", "
\n", "
\n", "
\n", " \n", " 1950/07/14\n", " \n", "
\n", "
\n", "
\n", " \n", " let\n", " \n", "
\n", "
\n", "
\n", " \n", " 9\n", " \n", "
\n", "
\n", "
\n", " \n", " Johannes\n", " \n", "
\n", "
\n", "
\n", " \n", " KEPLER\n", " \n", "
\n", "
\n", "
\n", " \n", " 735.0\n", " \n", "
\n", "
\n", "
\n", " \n", " taco\n", " \n", "
\n", "
\n", "
\n", " \n", " 3.0\n", " \n", "
\n", "
\n", "
\n", " \n", " 1920/04/22\n", " \n", "
\n", "
\n", "
\n", " \n", " you\n", " \n", "
\n", "
\n", "
\n", " \n", " 10\n", " \n", "
\n", "
\n", "
\n", " \n", " JaMES\n", " \n", "
\n", "
\n", "
\n", " \n", " M$$ax%%well\n", " \n", "
\n", "
\n", "
\n", " \n", " 875.0\n", " \n", "
\n", "
\n", "
\n", " \n", " taco\n", " \n", "
\n", "
\n", "
\n", " \n", " 3.0\n", " \n", "
\n", "
\n", "
\n", " \n", " 1923/03/12\n", " \n", "
\n", "
\n", "
\n", " \n", " down\n", " \n", "
\n", "
\n", "
\n", " \n", " 11\n", " \n", "
\n", "
\n", "
\n", " \n", " Isaac\n", " \n", "
\n", "
\n", "
\n", " \n", " Newton\n", " \n", "
\n", "
\n", "
\n", " \n", " 992.0\n", " \n", "
\n", "
\n", "
\n", " \n", " pasta\n", " \n", "
\n", "
\n", "
\n", " \n", " 9.0\n", " \n", "
\n", "
\n", "
\n", " \n", " 1999/02/15\n", " \n", "
\n", "
\n", "
\n", " \n", " never⋅\n", " \n", "
\n", "
\n", "
\n", " \n", " 12\n", " \n", "
\n", "
\n", "
\n", " \n", " Emmy%%\n", " \n", "
\n", "
\n", "
\n", " \n", " Nöether$\n", " \n", "
\n", "
\n", "
\n", " \n", " 234.0\n", " \n", "
\n", "
\n", "
\n", " \n", " pasta\n", " \n", "
\n", "
\n", "
\n", " \n", " 9.0\n", " \n", "
\n", "
\n", "
\n", " \n", " 1993/12/08\n", " \n", "
\n", "
\n", "
\n", " \n", " gonna\n", " \n", "
\n", "
\n", "
\n", " \n", " 13\n", " \n", "
\n", "
\n", "
\n", " \n", " nan\n", " \n", "
\n", "
\n", "
\n", " \n", " nan\n", " \n", "
\n", "
\n", "
\n", " \n", " nan\n", " \n", "
\n", "
\n", "
\n", " \n", " nan\n", " \n", "
\n", "
\n", "
\n", " \n", " nan\n", " \n", "
\n", "
\n", "
\n", " \n", " nan\n", " \n", "
\n", "
\n", "
\n", " \n", " nan\n", " \n", "
\n", "
\n", "\n", "\n", "
Viewing 13 of 13 rows / 13 columns
\n", "
1 partition(s)
\n" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "df.ext.display(20)" ] }, { "cell_type": "code", "execution_count": 103, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "data/foo.csv\n" ] } ], "source": [ "df1= op.load.csv(\"data/foo.csv\", sep=\",\", header=True, infer_schema='true', charset=\"ISO-8859–1\", null_value=\"None\")" ] }, { "cell_type": "code", "execution_count": 108, "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n", "\n", "\n", "\n", "\n", "
Viewing 19 of 19 rows / 19 columns
\n", "
1 partition(s)
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
\n", "
id
\n", "
1 (int64)
\n", "
\n", " \n", " not nullable\n", " \n", "
\n", "
\n", "
firstName
\n", "
2 (object)
\n", "
\n", " \n", " not nullable\n", " \n", "
\n", "
\n", "
lastName
\n", "
3 (object)
\n", "
\n", " \n", " not nullable\n", " \n", "
\n", "
\n", "
billingId
\n", "
4 (int64)
\n", "
\n", " \n", " not nullable\n", " \n", "
\n", "
\n", "
product
\n", "
5 (object)
\n", "
\n", " \n", " not nullable\n", " \n", "
\n", "
\n", "
price
\n", "
6 (int64)
\n", "
\n", " \n", " not nullable\n", " \n", "
\n", "
\n", "
birth
\n", "
7 (object)
\n", "
\n", " \n", " not nullable\n", " \n", "
\n", "
\n", "
dummyCol
\n", "
8 (object)
\n", "
\n", " \n", " not nullable\n", " \n", "
\n", "
\n", "
\n", " \n", " 1\n", " \n", "
\n", "
\n", "
\n", " \n", " Luis\n", " \n", "
\n", "
\n", "
\n", " \n", " Alvarez$$%!\n", " \n", "
\n", "
\n", "
\n", " \n", " 123\n", " \n", "
\n", "
\n", "
\n", " \n", " Cake\n", " \n", "
\n", "
\n", "
\n", " \n", " 10\n", " \n", "
\n", "
\n", "
\n", " \n", " 1980/07/07\n", " \n", "
\n", "
\n", "
\n", " \n", " never\n", " \n", "
\n", "
\n", "
\n", " \n", " 2\n", " \n", "
\n", "
\n", "
\n", " \n", " André\n", " \n", "
\n", "
\n", "
\n", " \n", " Ampère\n", " \n", "
\n", "
\n", "
\n", " \n", " 423\n", " \n", "
\n", "
\n", "
\n", " \n", " piza\n", " \n", "
\n", "
\n", "
\n", " \n", " 8\n", " \n", "
\n", "
\n", "
\n", " \n", " 1950/07/08\n", " \n", "
\n", "
\n", "
\n", " \n", " gonna\n", " \n", "
\n", "
\n", "
\n", " \n", " 3\n", " \n", "
\n", "
\n", "
\n", " \n", " NiELS\n", " \n", "
\n", "
\n", "
\n", " \n", " Böhr//((%%\n", " \n", "
\n", "
\n", "
\n", " \n", " 551\n", " \n", "
\n", "
\n", "
\n", " \n", " pizza\n", " \n", "
\n", "
\n", "
\n", " \n", " 8\n", " \n", "
\n", "
\n", "
\n", " \n", " 1990/07/09\n", " \n", "
\n", "
\n", "
\n", " \n", " give\n", " \n", "
\n", "
\n", "
\n", " \n", " 4\n", " \n", "
\n", "
\n", "
\n", " \n", " PAUL\n", " \n", "
\n", "
\n", "
\n", " \n", " dirac$\n", " \n", "
\n", "
\n", "
\n", " \n", " 521\n", " \n", "
\n", "
\n", "
\n", " \n", " pizza\n", " \n", "
\n", "
\n", "
\n", " \n", " 8\n", " \n", "
\n", "
\n", "
\n", " \n", " 1954/07/10\n", " \n", "
\n", "
\n", "
\n", " \n", " you\n", " \n", "
\n", "
\n", "
\n", " \n", " 5\n", " \n", "
\n", "
\n", "
\n", " \n", " Albert\n", " \n", "
\n", "
\n", "
\n", " \n", " Einstein\n", " \n", "
\n", "
\n", "
\n", " \n", " 634\n", " \n", "
\n", "
\n", "
\n", " \n", " pizza\n", " \n", "
\n", "
\n", "
\n", " \n", " 8\n", " \n", "
\n", "
\n", "
\n", " \n", " 1990/07/11\n", " \n", "
\n", "
\n", "
\n", " \n", " up\n", " \n", "
\n", "
\n", "
\n", " \n", " 6\n", " \n", "
\n", "
\n", "
\n", " \n", " Galileo\n", " \n", "
\n", "
\n", "
\n", " \n", " ⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅GALiLEI\n", " \n", "
\n", "
\n", "
\n", " \n", " 672\n", " \n", "
\n", "
\n", "
\n", " \n", " arepa\n", " \n", "
\n", "
\n", "
\n", " \n", " 5\n", " \n", "
\n", "
\n", "
\n", " \n", " 1930/08/12\n", " \n", "
\n", "
\n", "
\n", " \n", " never\n", " \n", "
\n", "
\n", "
\n", " \n", " 7\n", " \n", "
\n", "
\n", "
\n", " \n", " CaRL\n", " \n", "
\n", "
\n", "
\n", " \n", " Ga%%%uss\n", " \n", "
\n", "
\n", "
\n", " \n", " 323\n", " \n", "
\n", "
\n", "
\n", " \n", " taco\n", " \n", "
\n", "
\n", "
\n", " \n", " 3\n", " \n", "
\n", "
\n", "
\n", " \n", " 1970/07/13\n", " \n", "
\n", "
\n", "
\n", " \n", " gonna\n", " \n", "
\n", "
\n", "
\n", " \n", " 8\n", " \n", "
\n", "
\n", "
\n", " \n", " David\n", " \n", "
\n", "
\n", "
\n", " \n", " H$$$ilbert\n", " \n", "
\n", "
\n", "
\n", " \n", " 624\n", " \n", "
\n", "
\n", "
\n", " \n", " taaaccoo\n", " \n", "
\n", "
\n", "
\n", " \n", " 3\n", " \n", "
\n", "
\n", "
\n", " \n", " 1950/07/14\n", " \n", "
\n", "
\n", "
\n", " \n", " let\n", " \n", "
\n", "
\n", "
\n", " \n", " 9\n", " \n", "
\n", "
\n", "
\n", " \n", " Johannes\n", " \n", "
\n", "
\n", "
\n", " \n", " KEPLER\n", " \n", "
\n", "
\n", "
\n", " \n", " 735\n", " \n", "
\n", "
\n", "
\n", " \n", " taco\n", " \n", "
\n", "
\n", "
\n", " \n", " 3\n", " \n", "
\n", "
\n", "
\n", " \n", " 1920/04/22\n", " \n", "
\n", "
\n", "
\n", " \n", " you\n", " \n", "
\n", "
\n", "
\n", " \n", " 10\n", " \n", "
\n", "
\n", "
\n", " \n", " JaMES\n", " \n", "
\n", "
\n", "
\n", " \n", " M$$ax%%well\n", " \n", "
\n", "
\n", "
\n", " \n", " 875\n", " \n", "
\n", "
\n", "
\n", " \n", " taco\n", " \n", "
\n", "
\n", "
\n", " \n", " 3\n", " \n", "
\n", "
\n", "
\n", " \n", " 1923/03/12\n", " \n", "
\n", "
\n", "
\n", " \n", " down\n", " \n", "
\n", "
\n", "
\n", " \n", " 11\n", " \n", "
\n", "
\n", "
\n", " \n", " Isaac\n", " \n", "
\n", "
\n", "
\n", " \n", " Newton\n", " \n", "
\n", "
\n", "
\n", " \n", " 992\n", " \n", "
\n", "
\n", "
\n", " \n", " pasta\n", " \n", "
\n", "
\n", "
\n", " \n", " 9\n", " \n", "
\n", "
\n", "
\n", " \n", " 1999/02/15\n", " \n", "
\n", "
\n", "
\n", " \n", " never⋅\n", " \n", "
\n", "
\n", "
\n", " \n", " 12\n", " \n", "
\n", "
\n", "
\n", " \n", " Emmy%%\n", " \n", "
\n", "
\n", "
\n", " \n", " Nöether$\n", " \n", "
\n", "
\n", "
\n", " \n", " 234\n", " \n", "
\n", "
\n", "
\n", " \n", " pasta\n", " \n", "
\n", "
\n", "
\n", " \n", " 9\n", " \n", "
\n", "
\n", "
\n", " \n", " 1993/12/08\n", " \n", "
\n", "
\n", "
\n", " \n", " gonna\n", " \n", "
\n", "
\n", "
\n", " \n", " 13\n", " \n", "
\n", "
\n", "
\n", " \n", " Max!!!\n", " \n", "
\n", "
\n", "
\n", " \n", " Planck!!!\n", " \n", "
\n", "
\n", "
\n", " \n", " 111\n", " \n", "
\n", "
\n", "
\n", " \n", " hamburguer\n", " \n", "
\n", "
\n", "
\n", " \n", " 4\n", " \n", "
\n", "
\n", "
\n", " \n", " 1994/01/04\n", " \n", "
\n", "
\n", "
\n", " \n", " run⋅\n", " \n", "
\n", "
\n", "
\n", " \n", " 14\n", " \n", "
\n", "
\n", "
\n", " \n", " Fred\n", " \n", "
\n", "
\n", "
\n", " \n", " Hoy&&&le\n", " \n", "
\n", "
\n", "
\n", " \n", " 553\n", " \n", "
\n", "
\n", "
\n", " \n", " pizzza\n", " \n", "
\n", "
\n", "
\n", " \n", " 8\n", " \n", "
\n", "
\n", "
\n", " \n", " 1997/06/27\n", " \n", "
\n", "
\n", "
\n", " \n", " around\n", " \n", "
\n", "
\n", "
\n", " \n", " 15\n", " \n", "
\n", "
\n", "
\n", " \n", " (((⋅⋅⋅Heinrich⋅)))))\n", " \n", "
\n", "
\n", "
\n", " \n", " Hertz\n", " \n", "
\n", "
\n", "
\n", " \n", " 116\n", " \n", "
\n", "
\n", "
\n", " \n", " pizza\n", " \n", "
\n", "
\n", "
\n", " \n", " 8\n", " \n", "
\n", "
\n", "
\n", " \n", " 1956/11/30\n", " \n", "
\n", "
\n", "
\n", " \n", " and\n", " \n", "
\n", "
\n", "
\n", " \n", " 16\n", " \n", "
\n", "
\n", "
\n", " \n", " William\n", " \n", "
\n", "
\n", "
\n", " \n", " Gilbert###\n", " \n", "
\n", "
\n", "
\n", " \n", " 886\n", " \n", "
\n", "
\n", "
\n", " \n", " BEER\n", " \n", "
\n", "
\n", "
\n", " \n", " 2\n", " \n", "
\n", "
\n", "
\n", " \n", " 1958/03/26\n", " \n", "
\n", "
\n", "
\n", " \n", " desert\n", " \n", "
\n", "
\n", "
\n", " \n", " 17\n", " \n", "
\n", "
\n", "
\n", " \n", " Marie\n", " \n", "
\n", "
\n", "
\n", " \n", " CURIE\n", " \n", "
\n", "
\n", "
\n", " \n", " 912\n", " \n", "
\n", "
\n", "
\n", " \n", " Rice\n", " \n", "
\n", "
\n", "
\n", " \n", " 1\n", " \n", "
\n", "
\n", "
\n", " \n", " 2000/03/22\n", " \n", "
\n", "
\n", "
\n", " \n", " you\n", " \n", "
\n", "
\n", "
\n", " \n", " 18\n", " \n", "
\n", "
\n", "
\n", " \n", " Arthur\n", " \n", "
\n", "
\n", "
\n", " \n", " COM%%%pton\n", " \n", "
\n", "
\n", "
\n", " \n", " 812\n", " \n", "
\n", "
\n", "
\n", " \n", " 110790\n", " \n", "
\n", "
\n", "
\n", " \n", " 5\n", " \n", "
\n", "
\n", "
\n", " \n", " 1899/01/01\n", " \n", "
\n", "
\n", "
\n", " \n", " #\n", " \n", "
\n", "
\n", "
\n", " \n", " 19\n", " \n", "
\n", "
\n", "
\n", " \n", " JAMES\n", " \n", "
\n", "
\n", "
\n", " \n", " Chadwick\n", " \n", "
\n", "
\n", "
\n", " \n", " 467\n", " \n", "
\n", "
\n", "
\n", " \n", " nan\n", " \n", "
\n", "
\n", "
\n", " \n", " 10\n", " \n", "
\n", "
\n", "
\n", " \n", " 1921/05/03\n", " \n", "
\n", "
\n", "
\n", " \n", " #\n", " \n", "
\n", "
\n", "\n", "\n", "
Viewing 19 of 19 rows / 19 columns
\n", "
1 partition(s)
\n" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "df1.ext.display(20)" ] }, { "cell_type": "code", "execution_count": 70, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "{'id': 'int64',\n", " 'firstName': 'object',\n", " 'lastName': 'object',\n", " 'billingId': 'float64',\n", " 'product': 'object',\n", " 'price': 'float64',\n", " 'birth': 'object',\n", " 'dummyCol': 'object'}" ] }, "execution_count": 70, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.cols.dtypes()" ] }, { "cell_type": "code", "execution_count": 65, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "20\n" ] }, { "data": { "text/html": [ "\n", "\n", "\n", "\n", "\n", "
Viewing 13 of 13 rows / 13 columns
\n", "
1 partition(s)
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
\n", "
id
\n", "
1 (int64)
\n", "
\n", " \n", " not nullable\n", " \n", "
\n", "
\n", "
firstName
\n", "
2 (object)
\n", "
\n", " \n", " not nullable\n", " \n", "
\n", "
\n", "
lastName
\n", "
3 (object)
\n", "
\n", " \n", " not nullable\n", " \n", "
\n", "
\n", "
billingId
\n", "
4 (float64)
\n", "
\n", " \n", " not nullable\n", " \n", "
\n", "
\n", "
product
\n", "
5 (object)
\n", "
\n", " \n", " not nullable\n", " \n", "
\n", "
\n", "
price
\n", "
6 (float64)
\n", "
\n", " \n", " not nullable\n", " \n", "
\n", "
\n", "
birth
\n", "
7 (object)
\n", "
\n", " \n", " not nullable\n", " \n", "
\n", "
\n", "
dummyCol
\n", "
8 (object)
\n", "
\n", " \n", " not nullable\n", " \n", "
\n", "
\n", "
\n", " \n", " 1\n", " \n", "
\n", "
\n", "
\n", " \n", " Luis\n", " \n", "
\n", "
\n", "
\n", " \n", " Alvarez$$%!\n", " \n", "
\n", "
\n", "
\n", " \n", " 123.0\n", " \n", "
\n", "
\n", "
\n", " \n", " Cake\n", " \n", "
\n", "
\n", "
\n", " \n", " 10.0\n", " \n", "
\n", "
\n", "
\n", " \n", " 1980/07/07\n", " \n", "
\n", "
\n", "
\n", " \n", " never\n", " \n", "
\n", "
\n", "
\n", " \n", " 2\n", " \n", "
\n", "
\n", "
\n", " \n", " André\n", " \n", "
\n", "
\n", "
\n", " \n", " Ampère\n", " \n", "
\n", "
\n", "
\n", " \n", " 423.0\n", " \n", "
\n", "
\n", "
\n", " \n", " piza\n", " \n", "
\n", "
\n", "
\n", " \n", " 8.0\n", " \n", "
\n", "
\n", "
\n", " \n", " 1950/07/08\n", " \n", "
\n", "
\n", "
\n", " \n", " gonna\n", " \n", "
\n", "
\n", "
\n", " \n", " 3\n", " \n", "
\n", "
\n", "
\n", " \n", " NiELS\n", " \n", "
\n", "
\n", "
\n", " \n", " Böhr//((%%\n", " \n", "
\n", "
\n", "
\n", " \n", " 551.0\n", " \n", "
\n", "
\n", "
\n", " \n", " pizza\n", " \n", "
\n", "
\n", "
\n", " \n", " 8.0\n", " \n", "
\n", "
\n", "
\n", " \n", " 1990/07/09\n", " \n", "
\n", "
\n", "
\n", " \n", " give\n", " \n", "
\n", "
\n", "
\n", " \n", " 4\n", " \n", "
\n", "
\n", "
\n", " \n", " PAUL\n", " \n", "
\n", "
\n", "
\n", " \n", " dirac$\n", " \n", "
\n", "
\n", "
\n", " \n", " 521.0\n", " \n", "
\n", "
\n", "
\n", " \n", " pizza\n", " \n", "
\n", "
\n", "
\n", " \n", " 8.0\n", " \n", "
\n", "
\n", "
\n", " \n", " 1954/07/10\n", " \n", "
\n", "
\n", "
\n", " \n", " you\n", " \n", "
\n", "
\n", "
\n", " \n", " 5\n", " \n", "
\n", "
\n", "
\n", " \n", " Albert\n", " \n", "
\n", "
\n", "
\n", " \n", " Einstein\n", " \n", "
\n", "
\n", "
\n", " \n", " 634.0\n", " \n", "
\n", "
\n", "
\n", " \n", " pizza\n", " \n", "
\n", "
\n", "
\n", " \n", " 8.0\n", " \n", "
\n", "
\n", "
\n", " \n", " 1990/07/11\n", " \n", "
\n", "
\n", "
\n", " \n", " up\n", " \n", "
\n", "
\n", "
\n", " \n", " 6\n", " \n", "
\n", "
\n", "
\n", " \n", " Galileo\n", " \n", "
\n", "
\n", "
\n", " \n", " ⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅GALiLEI\n", " \n", "
\n", "
\n", "
\n", " \n", " 672.0\n", " \n", "
\n", "
\n", "
\n", " \n", " arepa\n", " \n", "
\n", "
\n", "
\n", " \n", " 5.0\n", " \n", "
\n", "
\n", "
\n", " \n", " 1930/08/12\n", " \n", "
\n", "
\n", "
\n", " \n", " never\n", " \n", "
\n", "
\n", "
\n", " \n", " 7\n", " \n", "
\n", "
\n", "
\n", " \n", " CaRL\n", " \n", "
\n", "
\n", "
\n", " \n", " Ga%%%uss\n", " \n", "
\n", "
\n", "
\n", " \n", " 323.0\n", " \n", "
\n", "
\n", "
\n", " \n", " taco\n", " \n", "
\n", "
\n", "
\n", " \n", " 3.0\n", " \n", "
\n", "
\n", "
\n", " \n", " 1970/07/13\n", " \n", "
\n", "
\n", "
\n", " \n", " gonna\n", " \n", "
\n", "
\n", "
\n", " \n", " 8\n", " \n", "
\n", "
\n", "
\n", " \n", " David\n", " \n", "
\n", "
\n", "
\n", " \n", " H$$$ilbert\n", " \n", "
\n", "
\n", "
\n", " \n", " 624.0\n", " \n", "
\n", "
\n", "
\n", " \n", " taaaccoo\n", " \n", "
\n", "
\n", "
\n", " \n", " 3.0\n", " \n", "
\n", "
\n", "
\n", " \n", " 1950/07/14\n", " \n", "
\n", "
\n", "
\n", " \n", " let\n", " \n", "
\n", "
\n", "
\n", " \n", " 9\n", " \n", "
\n", "
\n", "
\n", " \n", " Johannes\n", " \n", "
\n", "
\n", "
\n", " \n", " KEPLER\n", " \n", "
\n", "
\n", "
\n", " \n", " 735.0\n", " \n", "
\n", "
\n", "
\n", " \n", " taco\n", " \n", "
\n", "
\n", "
\n", " \n", " 3.0\n", " \n", "
\n", "
\n", "
\n", " \n", " 1920/04/22\n", " \n", "
\n", "
\n", "
\n", " \n", " you\n", " \n", "
\n", "
\n", "
\n", " \n", " 10\n", " \n", "
\n", "
\n", "
\n", " \n", " JaMES\n", " \n", "
\n", "
\n", "
\n", " \n", " M$$ax%%well\n", " \n", "
\n", "
\n", "
\n", " \n", " 875.0\n", " \n", "
\n", "
\n", "
\n", " \n", " taco\n", " \n", "
\n", "
\n", "
\n", " \n", " 3.0\n", " \n", "
\n", "
\n", "
\n", " \n", " 1923/03/12\n", " \n", "
\n", "
\n", "
\n", " \n", " down\n", " \n", "
\n", "
\n", "
\n", " \n", " 11\n", " \n", "
\n", "
\n", "
\n", " \n", " Isaac\n", " \n", "
\n", "
\n", "
\n", " \n", " Newton\n", " \n", "
\n", "
\n", "
\n", " \n", " 992.0\n", " \n", "
\n", "
\n", "
\n", " \n", " pasta\n", " \n", "
\n", "
\n", "
\n", " \n", " 9.0\n", " \n", "
\n", "
\n", "
\n", " \n", " 1999/02/15\n", " \n", "
\n", "
\n", "
\n", " \n", " never⋅\n", " \n", "
\n", "
\n", "\n", "\n", "
Viewing 13 of 13 rows / 13 columns
\n", "
1 partition(s)
\n" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "df.ext.display(20)" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [], "source": [ "df.save.csv(\"data/dask/*.csv\")" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [], "source": [ "df.save.parquet(\"data/dask/foo.parquet\")" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [], "source": [ "df.save.json(\"data/dask/*.json\")" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Dask DataFrame Structure:\n", " id firstName lastName billingId product price birth dummyCol\n", "npartitions=1 \n", " int64 object object float64 object float64 object object\n", " ... ... ... ... ... ... ... ...\n", "Dask Name: from-delayed, 3 tasks\n" ] }, { "ename": "TypeError", "evalue": "unsupported operand type(s) for +: 'NoneType' and 'int'", "output_type": "error", "traceback": [ "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[1;31mTypeError\u001b[0m Traceback (most recent call last)", "\u001b[1;32m\u001b[0m in \u001b[0;36m\u001b[1;34m\u001b[0m\n\u001b[1;32m----> 1\u001b[1;33m \u001b[0mdf\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mrows\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mcreate_id\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m", "\u001b[1;32m~\\Documents\\Optimus\\optimus\\dask\\rows.py\u001b[0m in \u001b[0;36mcreate_id\u001b[1;34m(column)\u001b[0m\n\u001b[0;32m 14\u001b[0m \u001b[0mdf\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 15\u001b[0m \u001b[0mprint\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mdf\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 16\u001b[1;33m \u001b[0ma\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mda\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0marange\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mdf\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mdivisions\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;33m-\u001b[0m\u001b[1;36m1\u001b[0m\u001b[1;33m]\u001b[0m \u001b[1;33m+\u001b[0m \u001b[1;36m1\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mchunks\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mdf\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mdivisions\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;36m1\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 17\u001b[0m \u001b[0mdf\u001b[0m\u001b[1;33m[\u001b[0m\u001b[0mcolumn\u001b[0m\u001b[1;33m]\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mdd\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mfrom_dask_array\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0ma\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 18\u001b[0m \u001b[1;32mreturn\u001b[0m \u001b[0mdf\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", "\u001b[1;31mTypeError\u001b[0m: unsupported operand type(s) for +: 'NoneType' and 'int'" ] } ], "source": [ "df.rows.create_id()" ] }, { "cell_type": "code", "execution_count": 36, "metadata": {}, "outputs": [], "source": [ "df1 = df[(df.id > 0) & (df.id <= 7)]\n", "df2 = df1[(df.id > 0) & (df.id <= 3)]" ] }, { "cell_type": "code", "execution_count": 38, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "C:\\Users\\argenisleon\\Anaconda3\\lib\\site-packages\\dask\\utils.py:694: UserWarning: Boolean Series key will be reindexed to match DataFrame index.\n", " return getattr(obj, self.method)(*args, **kwargs)\n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
idfirstNamelastNamebillingIdproductpricebirthdummyCol
01LuisAlvarez$$%!123.0Cake10.01980/07/07never
12AndréAmpère423.0piza8.01950/07/08gonna
23NiELSBöhr//((%%551.0pizza8.01990/07/09give
\n", "
" ], "text/plain": [ " id firstName lastName billingId product price birth dummyCol\n", "0 1 Luis Alvarez$$%! 123.0 Cake 10.0 1980/07/07 never\n", "1 2 André Ampère 423.0 piza 8.0 1950/07/08 gonna\n", "2 3 NiELS Böhr//((%% 551.0 pizza 8.0 1990/07/09 give" ] }, "execution_count": 38, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df2.compute().head()" ] }, { "cell_type": "code", "execution_count": 61, "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n", "\n", "\n", "\n", "\n", "
Viewing 1 of 1 rows / 1 columns
\n", "
1 partition(s)
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
\n", "
id
\n", "
1 (int64)
\n", "
\n", " \n", " not nullable\n", " \n", "
\n", "
\n", "
firstName
\n", "
2 (object)
\n", "
\n", " \n", " not nullable\n", " \n", "
\n", "
\n", "
lastName
\n", "
3 (object)
\n", "
\n", " \n", " not nullable\n", " \n", "
\n", "
\n", "
billingId
\n", "
4 (float64)
\n", "
\n", " \n", " not nullable\n", " \n", "
\n", "
\n", "
product
\n", "
5 (object)
\n", "
\n", " \n", " not nullable\n", " \n", "
\n", "
\n", "
price
\n", "
6 (float64)
\n", "
\n", " \n", " not nullable\n", " \n", "
\n", "
\n", "
birth
\n", "
7 (object)
\n", "
\n", " \n", " not nullable\n", " \n", "
\n", "
\n", "
dummyCol
\n", "
8 (object)
\n", "
\n", " \n", " not nullable\n", " \n", "
\n", "
\n", "
\n", " \n", " 1\n", " \n", "
\n", "
\n", "
\n", " \n", " Luis\n", " \n", "
\n", "
\n", "
\n", " \n", " Alvarez$$%!\n", " \n", "
\n", "
\n", "
\n", " \n", " 123.0\n", " \n", "
\n", "
\n", "
\n", " \n", " Cake\n", " \n", "
\n", "
\n", "
\n", " \n", " 10.0\n", " \n", "
\n", "
\n", "
\n", " \n", " 1980/07/07\n", " \n", "
\n", "
\n", "
\n", " \n", " never\n", " \n", "
\n", "
\n", "\n", "\n", "
Viewing 1 of 1 rows / 1 columns
\n", "
1 partition(s)
\n" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "df.rows.select((df.id ==1 ) ).ext.display()" ] }, { "cell_type": "code", "execution_count": 58, "metadata": {}, "outputs": [ { "ename": "AttributeError", "evalue": "'NoneType' object has no attribute '_jvm'", "output_type": "error", "traceback": [ "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[1;31mAttributeError\u001b[0m Traceback (most recent call last)", "\u001b[1;32m~\\Anaconda3\\lib\\site-packages\\pyspark\\sql\\types.py\u001b[0m in \u001b[0;36m_parse_datatype_string\u001b[1;34m(s)\u001b[0m\n\u001b[0;32m 845\u001b[0m \u001b[1;31m# For backwards compatibility, \"integer\", \"struct\" and etc.\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 846\u001b[1;33m \u001b[1;32mreturn\u001b[0m \u001b[0mfrom_ddl_datatype\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0ms\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 847\u001b[0m \u001b[1;32mexcept\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", "\u001b[1;32m~\\Anaconda3\\lib\\site-packages\\pyspark\\sql\\types.py\u001b[0m in \u001b[0;36mfrom_ddl_datatype\u001b[1;34m(type_str)\u001b[0m\n\u001b[0;32m 837\u001b[0m return _parse_datatype_json_string(\n\u001b[1;32m--> 838\u001b[1;33m sc._jvm.org.apache.spark.sql.api.python.PythonSQLUtils.parseDataType(type_str).json())\n\u001b[0m\u001b[0;32m 839\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n", "\u001b[1;31mAttributeError\u001b[0m: 'NoneType' object has no attribute '_jvm'", "\nDuring handling of the above exception, another exception occurred:\n", "\u001b[1;31mAttributeError\u001b[0m Traceback (most recent call last)", "\u001b[1;32m~\\Anaconda3\\lib\\site-packages\\pyspark\\sql\\types.py\u001b[0m in \u001b[0;36m_parse_datatype_string\u001b[1;34m(s)\u001b[0m\n\u001b[0;32m 849\u001b[0m \u001b[1;31m# For backwards compatibility, \"fieldname: datatype, fieldname: datatype\" case.\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 850\u001b[1;33m \u001b[1;32mreturn\u001b[0m \u001b[0mfrom_ddl_datatype\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m\"struct<%s>\"\u001b[0m \u001b[1;33m%\u001b[0m \u001b[0ms\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mstrip\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 851\u001b[0m \u001b[1;32mexcept\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", "\u001b[1;32m~\\Anaconda3\\lib\\site-packages\\pyspark\\sql\\types.py\u001b[0m in \u001b[0;36mfrom_ddl_datatype\u001b[1;34m(type_str)\u001b[0m\n\u001b[0;32m 837\u001b[0m return _parse_datatype_json_string(\n\u001b[1;32m--> 838\u001b[1;33m sc._jvm.org.apache.spark.sql.api.python.PythonSQLUtils.parseDataType(type_str).json())\n\u001b[0m\u001b[0;32m 839\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n", "\u001b[1;31mAttributeError\u001b[0m: 'NoneType' object has no attribute '_jvm'", "\nDuring handling of the above exception, another exception occurred:\n", "\u001b[1;31mAttributeError\u001b[0m Traceback (most recent call last)", "\u001b[1;32m\u001b[0m in \u001b[0;36m\u001b[1;34m\u001b[0m\n\u001b[1;32m----> 1\u001b[1;33m \u001b[0mdf\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mrows\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mselect_by_dtypes\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m\"id\"\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;34m\"str\"\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mext\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mdisplay\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m", "\u001b[1;32m~\\Documents\\Optimus\\optimus\\dask\\rows.py\u001b[0m in \u001b[0;36mselect_by_dtypes\u001b[1;34m(input_cols, data_type)\u001b[0m\n\u001b[0;32m 43\u001b[0m \u001b[1;31m# self.cols.apply()\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 44\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 45\u001b[1;33m \u001b[1;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mwhere\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mfbdt\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0minput_cols\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mdata_type\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 46\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 47\u001b[0m \u001b[1;33m@\u001b[0m\u001b[0mstaticmethod\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", "\u001b[1;32m~\\Documents\\Optimus\\optimus\\audf.py\u001b[0m in \u001b[0;36mfilter_row_by_data_type\u001b[1;34m(col_name, data_type, get_type)\u001b[0m\n\u001b[0;32m 129\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 130\u001b[0m \u001b[0mcol_name\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mone_list_to_val\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mcol_name\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 131\u001b[1;33m \u001b[1;32mreturn\u001b[0m \u001b[0mF\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mpandas_udf\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mpandas_udf_func\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mreturn_data_type\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mcol_name\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m", "\u001b[1;32m~\\Anaconda3\\lib\\site-packages\\pyspark\\sql\\functions.py\u001b[0m in \u001b[0;36mpandas_udf\u001b[1;34m(f, returnType, functionType)\u001b[0m\n\u001b[0;32m 2304\u001b[0m \u001b[1;32mreturn\u001b[0m \u001b[0mfunctools\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mpartial\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0m_create_udf\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mreturnType\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mreturn_type\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mevalType\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0meval_type\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 2305\u001b[0m \u001b[1;32melse\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m-> 2306\u001b[1;33m \u001b[1;32mreturn\u001b[0m \u001b[0m_create_udf\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mf\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mf\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mreturnType\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mreturn_type\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mevalType\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0meval_type\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 2307\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 2308\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n", "\u001b[1;32m~\\Anaconda3\\lib\\site-packages\\pyspark\\sql\\udf.py\u001b[0m in \u001b[0;36m_create_udf\u001b[1;34m(f, returnType, evalType)\u001b[0m\n\u001b[0;32m 70\u001b[0m udf_obj = UserDefinedFunction(\n\u001b[0;32m 71\u001b[0m f, returnType=returnType, name=None, evalType=evalType, deterministic=True)\n\u001b[1;32m---> 72\u001b[1;33m \u001b[1;32mreturn\u001b[0m \u001b[0mudf_obj\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_wrapped\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 73\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 74\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n", "\u001b[1;32m~\\Anaconda3\\lib\\site-packages\\pyspark\\sql\\udf.py\u001b[0m in \u001b[0;36m_wrapped\u001b[1;34m(self)\u001b[0m\n\u001b[0;32m 193\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 194\u001b[0m \u001b[0mwrapper\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mfunc\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mfunc\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 195\u001b[1;33m \u001b[0mwrapper\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mreturnType\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mreturnType\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 196\u001b[0m \u001b[0mwrapper\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mevalType\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mevalType\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 197\u001b[0m \u001b[0mwrapper\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mdeterministic\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mdeterministic\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", "\u001b[1;32m~\\Anaconda3\\lib\\site-packages\\pyspark\\sql\\udf.py\u001b[0m in \u001b[0;36mreturnType\u001b[1;34m(self)\u001b[0m\n\u001b[0;32m 117\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_returnType_placeholder\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_returnType\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 118\u001b[0m \u001b[1;32melse\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 119\u001b[1;33m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_returnType_placeholder\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0m_parse_datatype_string\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_returnType\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 120\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 121\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mevalType\u001b[0m \u001b[1;33m==\u001b[0m \u001b[0mPythonEvalType\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mSQL_SCALAR_PANDAS_UDF\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", "\u001b[1;32m~\\Anaconda3\\lib\\site-packages\\pyspark\\sql\\types.py\u001b[0m in \u001b[0;36m_parse_datatype_string\u001b[1;34m(s)\u001b[0m\n\u001b[0;32m 850\u001b[0m \u001b[1;32mreturn\u001b[0m \u001b[0mfrom_ddl_datatype\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m\"struct<%s>\"\u001b[0m \u001b[1;33m%\u001b[0m \u001b[0ms\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mstrip\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 851\u001b[0m \u001b[1;32mexcept\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 852\u001b[1;33m \u001b[1;32mraise\u001b[0m \u001b[0me\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 853\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 854\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n", "\u001b[1;32m~\\Anaconda3\\lib\\site-packages\\pyspark\\sql\\types.py\u001b[0m in \u001b[0;36m_parse_datatype_string\u001b[1;34m(s)\u001b[0m\n\u001b[0;32m 840\u001b[0m \u001b[1;32mtry\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 841\u001b[0m \u001b[1;31m# DDL format, \"fieldname datatype, fieldname datatype\".\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 842\u001b[1;33m \u001b[1;32mreturn\u001b[0m \u001b[0mfrom_ddl_schema\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0ms\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 843\u001b[0m \u001b[1;32mexcept\u001b[0m \u001b[0mException\u001b[0m \u001b[1;32mas\u001b[0m \u001b[0me\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 844\u001b[0m \u001b[1;32mtry\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", "\u001b[1;32m~\\Anaconda3\\lib\\site-packages\\pyspark\\sql\\types.py\u001b[0m in \u001b[0;36mfrom_ddl_schema\u001b[1;34m(type_str)\u001b[0m\n\u001b[0;32m 832\u001b[0m \u001b[1;32mdef\u001b[0m \u001b[0mfrom_ddl_schema\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mtype_str\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 833\u001b[0m return _parse_datatype_json_string(\n\u001b[1;32m--> 834\u001b[1;33m sc._jvm.org.apache.spark.sql.types.StructType.fromDDL(type_str).json())\n\u001b[0m\u001b[0;32m 835\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 836\u001b[0m \u001b[1;32mdef\u001b[0m \u001b[0mfrom_ddl_datatype\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mtype_str\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", "\u001b[1;31mAttributeError\u001b[0m: 'NoneType' object has no attribute '_jvm'" ] } ], "source": [ "df.rows.select_by_dtypes(\"id\", \"str\").ext.display()" ] }, { "cell_type": "code", "execution_count": 59, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "C:\\Users\\argenisleon\\Anaconda3\\lib\\site-packages\\numpy\\lib\\function_base.py:3652: RuntimeWarning: Invalid value encountered in percentile\n", " interpolation=interpolation)\n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
idfirstNamelastNamebillingIdproductbirthdummyCol
price
3.07CaRLGa%%%uss323.0taco1970/07/13gonna
3.08DavidH$$$ilbert624.0taaaccoo1950/07/14let
3.09JohannesKEPLER735.0taco1920/04/22you
3.010JaMESM$$ax%%well875.0taco1923/03/12down
5.06GalileoGALiLEI672.0arepa1930/08/12never
8.02AndréAmpère423.0piza1950/07/08gonna
8.03NiELSBöhr//((%%551.0pizza1990/07/09give
8.04PAULdirac$521.0pizza1954/07/10you
8.05AlbertEinstein634.0pizza1990/07/11up
9.011IsaacNewton992.0pasta1999/02/15never
9.012Emmy%%Nöether$234.0pasta1993/12/08gonna
10.01LuisAlvarez$$%!123.0Cake1980/07/07never
NaN13NaNNaNNaNNaNNaNNaN
\n", "
" ], "text/plain": [ " id firstName lastName billingId product birth \\\n", "price \n", "3.0 7 CaRL Ga%%%uss 323.0 taco 1970/07/13 \n", "3.0 8 David H$$$ilbert 624.0 taaaccoo 1950/07/14 \n", "3.0 9 Johannes KEPLER 735.0 taco 1920/04/22 \n", "3.0 10 JaMES M$$ax%%well 875.0 taco 1923/03/12 \n", "5.0 6 Galileo GALiLEI 672.0 arepa 1930/08/12 \n", "8.0 2 André Ampère 423.0 piza 1950/07/08 \n", "8.0 3 NiELS Böhr//((%% 551.0 pizza 1990/07/09 \n", "8.0 4 PAUL dirac$ 521.0 pizza 1954/07/10 \n", "8.0 5 Albert Einstein 634.0 pizza 1990/07/11 \n", "9.0 11 Isaac Newton 992.0 pasta 1999/02/15 \n", "9.0 12 Emmy%% Nöether$ 234.0 pasta 1993/12/08 \n", "10.0 1 Luis Alvarez$$%! 123.0 Cake 1980/07/07 \n", "NaN 13 NaN NaN NaN NaN NaN \n", "\n", " dummyCol \n", "price \n", "3.0 gonna \n", "3.0 let \n", "3.0 you \n", "3.0 down \n", "5.0 never \n", "8.0 gonna \n", "8.0 give \n", "8.0 you \n", "8.0 up \n", "9.0 never \n", "9.0 gonna \n", "10.0 never \n", "NaN NaN " ] }, "execution_count": 59, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.set_index('price').compute()" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Wall time: 195 ms\n" ] }, { "data": { "text/plain": [ "{'min': {'billingId': 123.0}}" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "%%time\n", "df.cols.min([\"billingId\"])" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Wall time: 152 ms\n" ] }, { "data": { "text/plain": [ "{'max': {'billingId': 992.0}}" ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "%%time\n", "df.cols.max([\"billingId\"])" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "['id',\n", " 'firstName',\n", " 'lastName',\n", " 'billingId',\n", " 'product',\n", " 'price',\n", " 'birth',\n", " 'dummyCol']" ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.cols.names()" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "{'file_name': 'foo.csv', 'transformations': {'actions': {}}}" ] }, "execution_count": 11, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.meta.get()" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [ { "ename": "ValueError", "evalue": "Cannot rename index.", "output_type": "error", "traceback": [ "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[1;31mValueError\u001b[0m Traceback (most recent call last)", "\u001b[1;32m\u001b[0m in \u001b[0;36m\u001b[1;34m\u001b[0m\n\u001b[1;32m----> 1\u001b[1;33m \u001b[0mdf\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mcols\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mrename\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m\"id\"\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;34m\"id1\"\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m", "\u001b[1;32m~\\Anaconda3\\lib\\site-packages\\multipledispatch\\dispatcher.py\u001b[0m in \u001b[0;36m__call__\u001b[1;34m(self, *args, **kwargs)\u001b[0m\n\u001b[0;32m 276\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_cache\u001b[0m\u001b[1;33m[\u001b[0m\u001b[0mtypes\u001b[0m\u001b[1;33m]\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mfunc\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 277\u001b[0m \u001b[1;32mtry\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 278\u001b[1;33m \u001b[1;32mreturn\u001b[0m \u001b[0mfunc\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m*\u001b[0m\u001b[0margs\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 279\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 280\u001b[0m \u001b[1;32mexcept\u001b[0m \u001b[0mMDNotImplementedError\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", "\u001b[1;32m~\\Documents\\Optimus\\optimus\\dask\\columns.py\u001b[0m in \u001b[0;36mrename\u001b[1;34m(old_column, new_column)\u001b[0m\n\u001b[0;32m 186\u001b[0m \u001b[1;33m@\u001b[0m\u001b[0mdispatch\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mstr\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mstr\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 187\u001b[0m \u001b[1;32mdef\u001b[0m \u001b[0mrename\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mold_column\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mnew_column\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 188\u001b[1;33m \u001b[1;32mreturn\u001b[0m \u001b[0mCols\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mrename\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mold_column\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mnew_column\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;32mNone\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 189\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 190\u001b[0m \u001b[1;33m@\u001b[0m\u001b[0mstaticmethod\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", "\u001b[1;32m~\\Anaconda3\\lib\\site-packages\\multipledispatch\\dispatcher.py\u001b[0m in \u001b[0;36m__call__\u001b[1;34m(self, *args, **kwargs)\u001b[0m\n\u001b[0;32m 276\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_cache\u001b[0m\u001b[1;33m[\u001b[0m\u001b[0mtypes\u001b[0m\u001b[1;33m]\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mfunc\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 277\u001b[0m \u001b[1;32mtry\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 278\u001b[1;33m \u001b[1;32mreturn\u001b[0m \u001b[0mfunc\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m*\u001b[0m\u001b[0margs\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 279\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 280\u001b[0m \u001b[1;32mexcept\u001b[0m \u001b[0mMDNotImplementedError\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", "\u001b[1;32m~\\Documents\\Optimus\\optimus\\dask\\columns.py\u001b[0m in \u001b[0;36mrename\u001b[1;34m(columns_old_new, func)\u001b[0m\n\u001b[0;32m 162\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 163\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0mold_col_name\u001b[0m \u001b[1;33m!=\u001b[0m \u001b[0mcol_name\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 164\u001b[1;33m \u001b[0mdf\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mdf\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mrename\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m{\u001b[0m\u001b[0mold_col_name\u001b[0m\u001b[1;33m:\u001b[0m \u001b[0mcol_name\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;36m1\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m}\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 165\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 166\u001b[0m \u001b[0mdf\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mext\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mmeta\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mext\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mmeta\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", "\u001b[1;32m~\\Anaconda3\\lib\\site-packages\\dask\\dataframe\\core.py\u001b[0m in \u001b[0;36mrename\u001b[1;34m(self, index, columns)\u001b[0m\n\u001b[0;32m 2707\u001b[0m \u001b[1;32mdef\u001b[0m \u001b[0mrename\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mindex\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;32mNone\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mcolumns\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;32mNone\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 2708\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0mindex\u001b[0m \u001b[1;32mis\u001b[0m \u001b[1;32mnot\u001b[0m \u001b[1;32mNone\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m-> 2709\u001b[1;33m \u001b[1;32mraise\u001b[0m \u001b[0mValueError\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m\"Cannot rename index.\"\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 2710\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 2711\u001b[0m \u001b[1;31m# *args here is index, columns but columns arg is already used\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", "\u001b[1;31mValueError\u001b[0m: Cannot rename index." ] } ], "source": [ "df.cols.rename(\"id\", \"id1\")" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n", "\n", "\n", "\n", "\n", "
Viewing 13 of 13 rows / 13 columns
\n", "
1 partition(s)
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
\n", "
id
\n", "
1 (int64)
\n", "
\n", " \n", " not nullable\n", " \n", "
\n", "
\n", "
firstName
\n", "
2 (object)
\n", "
\n", " \n", " not nullable\n", " \n", "
\n", "
\n", "
lastName
\n", "
3 (object)
\n", "
\n", " \n", " not nullable\n", " \n", "
\n", "
\n", "
billingId
\n", "
4 (float64)
\n", "
\n", " \n", " not nullable\n", " \n", "
\n", "
\n", "
product
\n", "
5 (object)
\n", "
\n", " \n", " not nullable\n", " \n", "
\n", "
\n", "
price
\n", "
6 (float64)
\n", "
\n", " \n", " not nullable\n", " \n", "
\n", "
\n", "
birth
\n", "
7 (object)
\n", "
\n", " \n", " not nullable\n", " \n", "
\n", "
\n", "
dummyCol
\n", "
8 (object)
\n", "
\n", " \n", " not nullable\n", " \n", "
\n", "
\n", "
\n", " \n", " 1\n", " \n", "
\n", "
\n", "
\n", " \n", " Luis\n", " \n", "
\n", "
\n", "
\n", " \n", " Alvarez$$%!\n", " \n", "
\n", "
\n", "
\n", " \n", " 123.0\n", " \n", "
\n", "
\n", "
\n", " \n", " Cake\n", " \n", "
\n", "
\n", "
\n", " \n", " 10.0\n", " \n", "
\n", "
\n", "
\n", " \n", " 1980/07/07\n", " \n", "
\n", "
\n", "
\n", " \n", " never\n", " \n", "
\n", "
\n", "
\n", " \n", " 2\n", " \n", "
\n", "
\n", "
\n", " \n", " André\n", " \n", "
\n", "
\n", "
\n", " \n", " Ampère\n", " \n", "
\n", "
\n", "
\n", " \n", " 423.0\n", " \n", "
\n", "
\n", "
\n", " \n", " piza\n", " \n", "
\n", "
\n", "
\n", " \n", " 8.0\n", " \n", "
\n", "
\n", "
\n", " \n", " 1950/07/08\n", " \n", "
\n", "
\n", "
\n", " \n", " gonna\n", " \n", "
\n", "
\n", "
\n", " \n", " 3\n", " \n", "
\n", "
\n", "
\n", " \n", " NiELS\n", " \n", "
\n", "
\n", "
\n", " \n", " Böhr//((%%\n", " \n", "
\n", "
\n", "
\n", " \n", " 551.0\n", " \n", "
\n", "
\n", "
\n", " \n", " pizza\n", " \n", "
\n", "
\n", "
\n", " \n", " 8.0\n", " \n", "
\n", "
\n", "
\n", " \n", " 1990/07/09\n", " \n", "
\n", "
\n", "
\n", " \n", " give\n", " \n", "
\n", "
\n", "
\n", " \n", " 4\n", " \n", "
\n", "
\n", "
\n", " \n", " PAUL\n", " \n", "
\n", "
\n", "
\n", " \n", " dirac$\n", " \n", "
\n", "
\n", "
\n", " \n", " 521.0\n", " \n", "
\n", "
\n", "
\n", " \n", " pizza\n", " \n", "
\n", "
\n", "
\n", " \n", " 8.0\n", " \n", "
\n", "
\n", "
\n", " \n", " 1954/07/10\n", " \n", "
\n", "
\n", "
\n", " \n", " you\n", " \n", "
\n", "
\n", "
\n", " \n", " 5\n", " \n", "
\n", "
\n", "
\n", " \n", " Albert\n", " \n", "
\n", "
\n", "
\n", " \n", " Einstein\n", " \n", "
\n", "
\n", "
\n", " \n", " 634.0\n", " \n", "
\n", "
\n", "
\n", " \n", " pizza\n", " \n", "
\n", "
\n", "
\n", " \n", " 8.0\n", " \n", "
\n", "
\n", "
\n", " \n", " 1990/07/11\n", " \n", "
\n", "
\n", "
\n", " \n", " up\n", " \n", "
\n", "
\n", "
\n", " \n", " 6\n", " \n", "
\n", "
\n", "
\n", " \n", " Galileo\n", " \n", "
\n", "
\n", "
\n", " \n", " ⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅GALiLEI\n", " \n", "
\n", "
\n", "
\n", " \n", " 672.0\n", " \n", "
\n", "
\n", "
\n", " \n", " arepa\n", " \n", "
\n", "
\n", "
\n", " \n", " 5.0\n", " \n", "
\n", "
\n", "
\n", " \n", " 1930/08/12\n", " \n", "
\n", "
\n", "
\n", " \n", " never\n", " \n", "
\n", "
\n", "
\n", " \n", " 7\n", " \n", "
\n", "
\n", "
\n", " \n", " CaRL\n", " \n", "
\n", "
\n", "
\n", " \n", " Ga%%%uss\n", " \n", "
\n", "
\n", "
\n", " \n", " 323.0\n", " \n", "
\n", "
\n", "
\n", " \n", " taco\n", " \n", "
\n", "
\n", "
\n", " \n", " 3.0\n", " \n", "
\n", "
\n", "
\n", " \n", " 1970/07/13\n", " \n", "
\n", "
\n", "
\n", " \n", " gonna\n", " \n", "
\n", "
\n", "
\n", " \n", " 8\n", " \n", "
\n", "
\n", "
\n", " \n", " David\n", " \n", "
\n", "
\n", "
\n", " \n", " H$$$ilbert\n", " \n", "
\n", "
\n", "
\n", " \n", " 624.0\n", " \n", "
\n", "
\n", "
\n", " \n", " taaaccoo\n", " \n", "
\n", "
\n", "
\n", " \n", " 3.0\n", " \n", "
\n", "
\n", "
\n", " \n", " 1950/07/14\n", " \n", "
\n", "
\n", "
\n", " \n", " let\n", " \n", "
\n", "
\n", "
\n", " \n", " 9\n", " \n", "
\n", "
\n", "
\n", " \n", " Johannes\n", " \n", "
\n", "
\n", "
\n", " \n", " KEPLER\n", " \n", "
\n", "
\n", "
\n", " \n", " 735.0\n", " \n", "
\n", "
\n", "
\n", " \n", " taco\n", " \n", "
\n", "
\n", "
\n", " \n", " 3.0\n", " \n", "
\n", "
\n", "
\n", " \n", " 1920/04/22\n", " \n", "
\n", "
\n", "
\n", " \n", " you\n", " \n", "
\n", "
\n", "
\n", " \n", " 10\n", " \n", "
\n", "
\n", "
\n", " \n", " JaMES\n", " \n", "
\n", "
\n", "
\n", " \n", " M$$ax%%well\n", " \n", "
\n", "
\n", "
\n", " \n", " 875.0\n", " \n", "
\n", "
\n", "
\n", " \n", " taco\n", " \n", "
\n", "
\n", "
\n", " \n", " 3.0\n", " \n", "
\n", "
\n", "
\n", " \n", " 1923/03/12\n", " \n", "
\n", "
\n", "
\n", " \n", " down\n", " \n", "
\n", "
\n", "
\n", " \n", " 11\n", " \n", "
\n", "
\n", "
\n", " \n", " Isaac\n", " \n", "
\n", "
\n", "
\n", " \n", " Newton\n", " \n", "
\n", "
\n", "
\n", " \n", " 992.0\n", " \n", "
\n", "
\n", "
\n", " \n", " pasta\n", " \n", "
\n", "
\n", "
\n", " \n", " 9.0\n", " \n", "
\n", "
\n", "
\n", " \n", " 1999/02/15\n", " \n", "
\n", "
\n", "
\n", " \n", " never⋅\n", " \n", "
\n", "
\n", "
\n", " \n", " 12\n", " \n", "
\n", "
\n", "
\n", " \n", " Emmy%%\n", " \n", "
\n", "
\n", "
\n", " \n", " Nöether$\n", " \n", "
\n", "
\n", "
\n", " \n", " 234.0\n", " \n", "
\n", "
\n", "
\n", " \n", " pasta\n", " \n", "
\n", "
\n", "
\n", " \n", " 9.0\n", " \n", "
\n", "
\n", "
\n", " \n", " 1993/12/08\n", " \n", "
\n", "
\n", "
\n", " \n", " gonna\n", " \n", "
\n", "
\n", "
\n", " \n", " 13\n", " \n", "
\n", "
\n", "
\n", " \n", " nan\n", " \n", "
\n", "
\n", "
\n", " \n", " nan\n", " \n", "
\n", "
\n", "
\n", " \n", " nan\n", " \n", "
\n", "
\n", "
\n", " \n", " nan\n", " \n", "
\n", "
\n", "
\n", " \n", " nan\n", " \n", "
\n", "
\n", "
\n", " \n", " nan\n", " \n", "
\n", "
\n", "
\n", " \n", " nan\n", " \n", "
\n", "
\n", "\n", "\n", "
Viewing 13 of 13 rows / 13 columns
\n", "
1 partition(s)
\n" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "df.ext.display(\"all\")" ] }, { "cell_type": "code", "execution_count": 20, "metadata": {}, "outputs": [ { "ename": "AttributeError", "evalue": "'Cols' object has no attribute 'remove_special_chars'", "output_type": "error", "traceback": [ "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[1;31mAttributeError\u001b[0m Traceback (most recent call last)", "\u001b[1;32m\u001b[0m in \u001b[0;36m\u001b[1;34m\u001b[0m\n\u001b[1;32m----> 1\u001b[1;33m \u001b[0mdf\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mcols\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mremove_special_chars\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m\"lastName\"\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m", "\u001b[1;31mAttributeError\u001b[0m: 'Cols' object has no attribute 'remove_special_chars'" ] } ], "source": [ "df.cols.remove_special_chars(\"lastName\")" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Send!\n" ] } ], "source": [ "df.ext.send()" ] }, { "cell_type": "code", "execution_count": 24, "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n", "\n", "\n", "\n", "\n", "
Viewing 10 of 13 rows / 13 columns
\n", "
1 partition(s)
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
\n", "
id
\n", "
1 (int64)
\n", "
\n", " \n", " not nullable\n", " \n", "
\n", "
\n", "
firstName
\n", "
2 (object)
\n", "
\n", " \n", " not nullable\n", " \n", "
\n", "
\n", "
lastName
\n", "
3 (object)
\n", "
\n", " \n", " not nullable\n", " \n", "
\n", "
\n", "
billingId
\n", "
4 (float64)
\n", "
\n", " \n", " not nullable\n", " \n", "
\n", "
\n", "
product
\n", "
5 (object)
\n", "
\n", " \n", " not nullable\n", " \n", "
\n", "
\n", "
price
\n", "
6 (float64)
\n", "
\n", " \n", " not nullable\n", " \n", "
\n", "
\n", "
birth
\n", "
7 (object)
\n", "
\n", " \n", " not nullable\n", " \n", "
\n", "
\n", "
dummyCol
\n", "
8 (object)
\n", "
\n", " \n", " not nullable\n", " \n", "
\n", "
\n", "
\n", " \n", " 1\n", " \n", "
\n", "
\n", "
\n", " \n", " Luis\n", " \n", "
\n", "
\n", "
\n", " \n", " Alvarez$$%!\n", " \n", "
\n", "
\n", "
\n", " \n", " 123.0\n", " \n", "
\n", "
\n", "
\n", " \n", " Cake\n", " \n", "
\n", "
\n", "
\n", " \n", " 10.0\n", " \n", "
\n", "
\n", "
\n", " \n", " 1980/07/07\n", " \n", "
\n", "
\n", "
\n", " \n", " never\n", " \n", "
\n", "
\n", "
\n", " \n", " 2\n", " \n", "
\n", "
\n", "
\n", " \n", " André\n", " \n", "
\n", "
\n", "
\n", " \n", " Ampère\n", " \n", "
\n", "
\n", "
\n", " \n", " 423.0\n", " \n", "
\n", "
\n", "
\n", " \n", " piza\n", " \n", "
\n", "
\n", "
\n", " \n", " 8.0\n", " \n", "
\n", "
\n", "
\n", " \n", " 1950/07/08\n", " \n", "
\n", "
\n", "
\n", " \n", " gonna\n", " \n", "
\n", "
\n", "
\n", " \n", " 3\n", " \n", "
\n", "
\n", "
\n", " \n", " NiELS\n", " \n", "
\n", "
\n", "
\n", " \n", " Böhr//((%%\n", " \n", "
\n", "
\n", "
\n", " \n", " 551.0\n", " \n", "
\n", "
\n", "
\n", " \n", " pizza\n", " \n", "
\n", "
\n", "
\n", " \n", " 8.0\n", " \n", "
\n", "
\n", "
\n", " \n", " 1990/07/09\n", " \n", "
\n", "
\n", "
\n", " \n", " give\n", " \n", "
\n", "
\n", "
\n", " \n", " 4\n", " \n", "
\n", "
\n", "
\n", " \n", " PAUL\n", " \n", "
\n", "
\n", "
\n", " \n", " dirac$\n", " \n", "
\n", "
\n", "
\n", " \n", " 521.0\n", " \n", "
\n", "
\n", "
\n", " \n", " pizza\n", " \n", "
\n", "
\n", "
\n", " \n", " 8.0\n", " \n", "
\n", "
\n", "
\n", " \n", " 1954/07/10\n", " \n", "
\n", "
\n", "
\n", " \n", " you\n", " \n", "
\n", "
\n", "
\n", " \n", " 5\n", " \n", "
\n", "
\n", "
\n", " \n", " Albert\n", " \n", "
\n", "
\n", "
\n", " \n", " Einstein\n", " \n", "
\n", "
\n", "
\n", " \n", " 634.0\n", " \n", "
\n", "
\n", "
\n", " \n", " pizza\n", " \n", "
\n", "
\n", "
\n", " \n", " 8.0\n", " \n", "
\n", "
\n", "
\n", " \n", " 1990/07/11\n", " \n", "
\n", "
\n", "
\n", " \n", " up\n", " \n", "
\n", "
\n", "
\n", " \n", " 6\n", " \n", "
\n", "
\n", "
\n", " \n", " Galileo\n", " \n", "
\n", "
\n", "
\n", " \n", " ⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅GALiLEI\n", " \n", "
\n", "
\n", "
\n", " \n", " 672.0\n", " \n", "
\n", "
\n", "
\n", " \n", " arepa\n", " \n", "
\n", "
\n", "
\n", " \n", " 5.0\n", " \n", "
\n", "
\n", "
\n", " \n", " 1930/08/12\n", " \n", "
\n", "
\n", "
\n", " \n", " never\n", " \n", "
\n", "
\n", "
\n", " \n", " 7\n", " \n", "
\n", "
\n", "
\n", " \n", " CaRL\n", " \n", "
\n", "
\n", "
\n", " \n", " Ga%%%uss\n", " \n", "
\n", "
\n", "
\n", " \n", " 323.0\n", " \n", "
\n", "
\n", "
\n", " \n", " taco\n", " \n", "
\n", "
\n", "
\n", " \n", " 3.0\n", " \n", "
\n", "
\n", "
\n", " \n", " 1970/07/13\n", " \n", "
\n", "
\n", "
\n", " \n", " gonna\n", " \n", "
\n", "
\n", "
\n", " \n", " 8\n", " \n", "
\n", "
\n", "
\n", " \n", " David\n", " \n", "
\n", "
\n", "
\n", " \n", " H$$$ilbert\n", " \n", "
\n", "
\n", "
\n", " \n", " 624.0\n", " \n", "
\n", "
\n", "
\n", " \n", " taaaccoo\n", " \n", "
\n", "
\n", "
\n", " \n", " 3.0\n", " \n", "
\n", "
\n", "
\n", " \n", " 1950/07/14\n", " \n", "
\n", "
\n", "
\n", " \n", " let\n", " \n", "
\n", "
\n", "
\n", " \n", " 9\n", " \n", "
\n", "
\n", "
\n", " \n", " Johannes\n", " \n", "
\n", "
\n", "
\n", " \n", " KEPLER\n", " \n", "
\n", "
\n", "
\n", " \n", " 735.0\n", " \n", "
\n", "
\n", "
\n", " \n", " taco\n", " \n", "
\n", "
\n", "
\n", " \n", " 3.0\n", " \n", "
\n", "
\n", "
\n", " \n", " 1920/04/22\n", " \n", "
\n", "
\n", "
\n", " \n", " you\n", " \n", "
\n", "
\n", "
\n", " \n", " 10\n", " \n", "
\n", "
\n", "
\n", " \n", " JaMES\n", " \n", "
\n", "
\n", "
\n", " \n", " M$$ax%%well\n", " \n", "
\n", "
\n", "
\n", " \n", " 875.0\n", " \n", "
\n", "
\n", "
\n", " \n", " taco\n", " \n", "
\n", "
\n", "
\n", " \n", " 3.0\n", " \n", "
\n", "
\n", "
\n", " \n", " 1923/03/12\n", " \n", "
\n", "
\n", "
\n", " \n", " down\n", " \n", "
\n", "
\n", "
\n", " \n", " 11\n", " \n", "
\n", "
\n", "
\n", " \n", " Isaac\n", " \n", "
\n", "
\n", "
\n", " \n", " Newton\n", " \n", "
\n", "
\n", "
\n", " \n", " 992.0\n", " \n", "
\n", "
\n", "
\n", " \n", " pasta\n", " \n", "
\n", "
\n", "
\n", " \n", " 9.0\n", " \n", "
\n", "
\n", "
\n", " \n", " 1999/02/15\n", " \n", "
\n", "
\n", "
\n", " \n", " never⋅\n", " \n", "
\n", "
\n", "\n", "\n", "
Viewing 10 of 13 rows / 13 columns
\n", "
1 partition(s)
\n" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "df.ext.display()" ] }, { "cell_type": "code", "execution_count": 25, "metadata": {}, "outputs": [ { "ename": "ValueError", "evalue": "'columns' must be 'not None', received 'None'", "output_type": "error", "traceback": [ "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[1;31mValueError\u001b[0m Traceback (most recent call last)", "\u001b[1;32m\u001b[0m in \u001b[0;36m\u001b[1;34m\u001b[0m\n\u001b[1;32m----> 1\u001b[1;33m \u001b[0mdf\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mcols\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mreplace\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m\"lastName\"\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;34m\"$\"\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;34m\"\"\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;34m\"chars\"\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m", "\u001b[1;32m~\\Documents\\Optimus\\optimus\\dask\\columns.py\u001b[0m in \u001b[0;36mreplace\u001b[1;34m(input_cols, search, replace_by, search_by, output_cols)\u001b[0m\n\u001b[0;32m 354\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 355\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 356\u001b[1;33m \u001b[0mcheck_column_numbers\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0minput_cols\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;34m\"*\"\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 357\u001b[0m \u001b[0moutput_cols\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mget_output_cols\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0minput_cols\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0moutput_cols\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 358\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n", "\u001b[1;32m~\\Documents\\Optimus\\optimus\\helpers\\columns.py\u001b[0m in \u001b[0;36mcheck_column_numbers\u001b[1;34m(columns, number)\u001b[0m\n\u001b[0;32m 198\u001b[0m \"\"\"\n\u001b[0;32m 199\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0mcolumns\u001b[0m \u001b[1;32mis\u001b[0m \u001b[1;32mNone\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 200\u001b[1;33m \u001b[0mRaiseIt\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mvalue_error\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mcolumns\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;34m\"not None\"\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 201\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 202\u001b[0m \u001b[0mcount\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mlen\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mcolumns\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", "\u001b[1;32m~\\Documents\\Optimus\\optimus\\helpers\\raiseit.py\u001b[0m in \u001b[0;36mvalue_error\u001b[1;34m(var, data_values)\u001b[0m\n\u001b[0;32m 76\u001b[0m type=divisor.join(map(\n\u001b[0;32m 77\u001b[0m \u001b[1;32mlambda\u001b[0m \u001b[0mx\u001b[0m\u001b[1;33m:\u001b[0m \u001b[1;34m\"'\"\u001b[0m \u001b[1;33m+\u001b[0m \u001b[0mx\u001b[0m \u001b[1;33m+\u001b[0m \u001b[1;34m\"'\"\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 78\u001b[1;33m data_values)), var_type=one_list_to_val(var)))\n\u001b[0m\u001b[0;32m 79\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 80\u001b[0m \u001b[1;33m@\u001b[0m\u001b[0mstaticmethod\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", "\u001b[1;31mValueError\u001b[0m: 'columns' must be 'not None', received 'None'" ] } ], "source": [ "df.cols.replace(\"lastName\",\"$\", \"\",\"chars\")" ] }, { "cell_type": "code", "execution_count": 16, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "{'id': {'int64': 13}, 'firstName': {'object': 13}, 'lastName': {'object': 13}, 'billingId': {'float64': 13}, 'product': {'object': 13}, 'price': {'float64': 13}, 'birth': {'object': 13}, 'dummyCol': {'object': 13}}\n" ] }, { "data": { "text/plain": [ "{'id': {'int': 13, 'float': 0, 'object': 0},\n", " 'firstName': {'int': 0, 'float': 0, 'object': 13},\n", " 'lastName': {'int': 0, 'float': 0, 'object': 13},\n", " 'billingId': {'int': 0, 'float': 13, 'object': 0},\n", " 'product': {'int': 0, 'float': 0, 'object': 13},\n", " 'price': {'int': 0, 'float': 13, 'object': 0},\n", " 'birth': {'int': 0, 'float': 0, 'object': 13},\n", " 'dummyCol': {'int': 0, 'float': 0, 'object': 13}}" ] }, "execution_count": 16, "metadata": {}, "output_type": "execute_result" } ], "source": [ "from optimus.profiler.profiler import Profiler\n", "p = Profiler()\n", "\n", "df.cols.count_by_dtypes(\"*\")" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "C:\\Users\\argenisleon\\Anaconda3\\lib\\site-packages\\dask\\dataframe\\io\\demo.py:91: FutureWarning: Creating a DatetimeIndex by passing range endpoints is deprecated. Use `pandas.date_range` instead.\n", " freq=partition_freq))\n", "C:\\Users\\argenisleon\\Anaconda3\\lib\\site-packages\\dask\\dataframe\\io\\demo.py:45: FutureWarning: Creating a DatetimeIndex by passing range endpoints is deprecated. Use `pandas.date_range` instead.\n", " index = pd.DatetimeIndex(start=start, end=end, freq=freq, name='timestamp')\n" ] } ], "source": [ "import dask\n", "import dask.datasets\n", "import numpy as np\n", "import time\n", "from distributed import Client\n", "\n", "client = Client()\n", "client\n", "\n", "df = dask.datasets.timeseries()\n", "df = df.repartition(npartitions=300)\n", "df = client.persist(df)\n" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Dask Series Structure:\n", "npartitions=300\n", "2000-01-01 00:00:00 bool\n", "2000-01-01 02:24:00 ...\n", " ... \n", "2000-01-30 21:36:00 ...\n", "2000-01-31 00:00:00 ...\n", "Name: x, dtype: bool\n", "Dask Name: logical_and, 10800 tasks\n", "Dask Series Structure:\n", "npartitions=300\n", "2000-01-01 00:00:00 bool\n", "2000-01-01 02:24:00 ...\n", " ... \n", "2000-01-30 21:36:00 ...\n", "2000-01-31 00:00:00 ...\n", "Name: x, dtype: bool\n", "Dask Name: logical_and, 10800 tasks\n", "Dask Series Structure:\n", "npartitions=300\n", "2000-01-01 00:00:00 bool\n", "2000-01-01 02:24:00 ...\n", " ... \n", "2000-01-30 21:36:00 ...\n", "2000-01-31 00:00:00 ...\n", "Name: x, dtype: bool\n", "Dask Name: logical_and, 10800 tasks\n", "Dask Series Structure:\n", "npartitions=300\n", "2000-01-01 00:00:00 bool\n", "2000-01-01 02:24:00 ...\n", " ... \n", "2000-01-30 21:36:00 ...\n", "2000-01-31 00:00:00 ...\n", "Name: x, dtype: bool\n", "Dask Name: logical_and, 10800 tasks\n", "Dask Series Structure:\n", "npartitions=300\n", "2000-01-01 00:00:00 bool\n", "2000-01-01 02:24:00 ...\n", " ... \n", "2000-01-30 21:36:00 ...\n", "2000-01-31 00:00:00 ...\n", "Name: x, dtype: bool\n", "Dask Name: logical_and, 10800 tasks\n", "Dask Series Structure:\n", "npartitions=300\n", "2000-01-01 00:00:00 bool\n", "2000-01-01 02:24:00 ...\n", " ... \n", "2000-01-30 21:36:00 ...\n", "2000-01-31 00:00:00 ...\n", "Name: x, dtype: bool\n", "Dask Name: logical_and, 10800 tasks\n", "Dask Series Structure:\n", "npartitions=300\n", "2000-01-01 00:00:00 bool\n", "2000-01-01 02:24:00 ...\n", " ... \n", "2000-01-30 21:36:00 ...\n", "2000-01-31 00:00:00 ...\n", "Name: x, dtype: bool\n", "Dask Name: logical_and, 10800 tasks\n", "Dask Series Structure:\n", "npartitions=300\n", "2000-01-01 00:00:00 bool\n", "2000-01-01 02:24:00 ...\n", " ... \n", "2000-01-30 21:36:00 ...\n", "2000-01-31 00:00:00 ...\n", "Name: x, dtype: bool\n", "Dask Name: logical_and, 10800 tasks\n", "Dask Series Structure:\n", "npartitions=300\n", "2000-01-01 00:00:00 bool\n", "2000-01-01 02:24:00 ...\n", " ... \n", "2000-01-30 21:36:00 ...\n", "2000-01-31 00:00:00 ...\n", "Name: x, dtype: bool\n", "Dask Name: logical_and, 10800 tasks\n", "Dask Series Structure:\n", "npartitions=300\n", "2000-01-01 00:00:00 bool\n", "2000-01-01 02:24:00 ...\n", " ... \n", "2000-01-30 21:36:00 ...\n", "2000-01-31 00:00:00 ...\n", "Name: x, dtype: bool\n", "Dask Name: logical_and, 10800 tasks\n", "Dask Series Structure:\n", "npartitions=300\n", "2000-01-01 00:00:00 bool\n", "2000-01-01 02:24:00 ...\n", " ... \n", "2000-01-30 21:36:00 ...\n", "2000-01-31 00:00:00 ...\n", "Name: x, dtype: bool\n", "Dask Name: logical_and, 10800 tasks\n", "Dask Series Structure:\n", "npartitions=300\n", "2000-01-01 00:00:00 bool\n", "2000-01-01 02:24:00 ...\n", " ... \n", "2000-01-30 21:36:00 ...\n", "2000-01-31 00:00:00 ...\n", "Name: x, dtype: bool\n", "Dask Name: logical_and, 10800 tasks\n", "Dask Series Structure:\n", "npartitions=300\n", "2000-01-01 00:00:00 bool\n", "2000-01-01 02:24:00 ...\n", " ... \n", "2000-01-30 21:36:00 ...\n", "2000-01-31 00:00:00 ...\n", "Name: x, dtype: bool\n", "Dask Name: logical_and, 10800 tasks\n", "Dask Series Structure:\n", "npartitions=300\n", "2000-01-01 00:00:00 bool\n", "2000-01-01 02:24:00 ...\n", " ... \n", "2000-01-30 21:36:00 ...\n", "2000-01-31 00:00:00 ...\n", "Name: x, dtype: bool\n", "Dask Name: logical_and, 10800 tasks\n", "Dask Series Structure:\n", "npartitions=300\n", "2000-01-01 00:00:00 bool\n", "2000-01-01 02:24:00 ...\n", " ... \n", "2000-01-30 21:36:00 ...\n", "2000-01-31 00:00:00 ...\n", "Name: x, dtype: bool\n", "Dask Name: logical_and, 10800 tasks\n", "Dask Series Structure:\n", "npartitions=300\n", "2000-01-01 00:00:00 bool\n", "2000-01-01 02:24:00 ...\n", " ... \n", "2000-01-30 21:36:00 ...\n", "2000-01-31 00:00:00 ...\n", "Name: x, dtype: bool\n", "Dask Name: logical_and, 10800 tasks\n", "Dask Series Structure:\n", "npartitions=300\n", "2000-01-01 00:00:00 bool\n", "2000-01-01 02:24:00 ...\n", " ... \n", "2000-01-30 21:36:00 ...\n", "2000-01-31 00:00:00 ...\n", "Name: x, dtype: bool\n", "Dask Name: logical_and, 10800 tasks\n", "Dask Series Structure:\n", "npartitions=300\n", "2000-01-01 00:00:00 bool\n", "2000-01-01 02:24:00 ...\n", " ... \n", "2000-01-30 21:36:00 ...\n", "2000-01-31 00:00:00 ...\n", "Name: x, dtype: bool\n", "Dask Name: logical_and, 10800 tasks\n", "Dask Series Structure:\n", "npartitions=300\n", "2000-01-01 00:00:00 bool\n", "2000-01-01 02:24:00 ...\n", " ... \n", "2000-01-30 21:36:00 ...\n", "2000-01-31 00:00:00 ...\n", "Name: x, dtype: bool\n", "Dask Name: logical_and, 10800 tasks\n", "Dask Series Structure:\n", "npartitions=300\n", "2000-01-01 00:00:00 bool\n", "2000-01-01 02:24:00 ...\n", " ... \n", "2000-01-30 21:36:00 ...\n", "2000-01-31 00:00:00 ...\n", "Name: x, dtype: bool\n", "Dask Name: logical_and, 10800 tasks\n", "Dask Series Structure:\n", "npartitions=300\n", "2000-01-01 00:00:00 bool\n", "2000-01-01 02:24:00 ...\n", " ... \n", "2000-01-30 21:36:00 ...\n", "2000-01-31 00:00:00 ...\n", "Name: y, dtype: bool\n", "Dask Name: logical_and, 7200 tasks\n", "Dask Series Structure:\n", "npartitions=300\n", "2000-01-01 00:00:00 bool\n", "2000-01-01 02:24:00 ...\n", " ... \n", "2000-01-30 21:36:00 ...\n", "2000-01-31 00:00:00 ...\n", "Name: y, dtype: bool\n", "Dask Name: logical_and, 7200 tasks\n", "Dask Series Structure:\n", "npartitions=300\n", "2000-01-01 00:00:00 bool\n", "2000-01-01 02:24:00 ...\n", " ... \n", "2000-01-30 21:36:00 ...\n", "2000-01-31 00:00:00 ...\n", "Name: y, dtype: bool\n", "Dask Name: logical_and, 7200 tasks\n", "Dask Series Structure:\n", "npartitions=300\n", "2000-01-01 00:00:00 bool\n", "2000-01-01 02:24:00 ...\n", " ... \n", "2000-01-30 21:36:00 ...\n", "2000-01-31 00:00:00 ...\n", "Name: y, dtype: bool\n", "Dask Name: logical_and, 7200 tasks\n", "Dask Series Structure:\n", "npartitions=300\n", "2000-01-01 00:00:00 bool\n", "2000-01-01 02:24:00 ...\n", " ... \n", "2000-01-30 21:36:00 ...\n", "2000-01-31 00:00:00 ...\n", "Name: y, dtype: bool\n", "Dask Name: logical_and, 7200 tasks\n", "Dask Series Structure:\n", "npartitions=300\n", "2000-01-01 00:00:00 bool\n", "2000-01-01 02:24:00 ...\n", " ... \n", "2000-01-30 21:36:00 ...\n", "2000-01-31 00:00:00 ...\n", "Name: y, dtype: bool\n", "Dask Name: logical_and, 7200 tasks\n", "Dask Series Structure:\n", "npartitions=300\n", "2000-01-01 00:00:00 bool\n", "2000-01-01 02:24:00 ...\n", " ... \n", "2000-01-30 21:36:00 ...\n", "2000-01-31 00:00:00 ...\n", "Name: y, dtype: bool\n", "Dask Name: logical_and, 7200 tasks\n", "Dask Series Structure:\n", "npartitions=300\n", "2000-01-01 00:00:00 bool\n", "2000-01-01 02:24:00 ...\n", " ... \n", "2000-01-30 21:36:00 ...\n", "2000-01-31 00:00:00 ...\n", "Name: y, dtype: bool\n", "Dask Name: logical_and, 7200 tasks\n", "Dask Series Structure:\n", "npartitions=300\n", "2000-01-01 00:00:00 bool\n", "2000-01-01 02:24:00 ...\n", " ... \n", "2000-01-30 21:36:00 ...\n", "2000-01-31 00:00:00 ...\n", "Name: y, dtype: bool\n", "Dask Name: logical_and, 7200 tasks\n", "Dask Series Structure:\n", "npartitions=300\n", "2000-01-01 00:00:00 bool\n", "2000-01-01 02:24:00 ...\n", " ... \n", "2000-01-30 21:36:00 ...\n", "2000-01-31 00:00:00 ...\n", "Name: y, dtype: bool\n", "Dask Name: logical_and, 7200 tasks\n", "Dask Series Structure:\n", "npartitions=300\n", "2000-01-01 00:00:00 bool\n", "2000-01-01 02:24:00 ...\n", " ... \n", "2000-01-30 21:36:00 ...\n", "2000-01-31 00:00:00 ...\n", "Name: y, dtype: bool\n", "Dask Name: logical_and, 7200 tasks\n", "Dask Series Structure:\n", "npartitions=300\n", "2000-01-01 00:00:00 bool\n", "2000-01-01 02:24:00 ...\n", " ... \n", "2000-01-30 21:36:00 ...\n", "2000-01-31 00:00:00 ...\n", "Name: y, dtype: bool\n", "Dask Name: logical_and, 7200 tasks\n", "Dask Series Structure:\n", "npartitions=300\n", "2000-01-01 00:00:00 bool\n", "2000-01-01 02:24:00 ...\n", " ... \n", "2000-01-30 21:36:00 ...\n", "2000-01-31 00:00:00 ...\n", "Name: y, dtype: bool\n", "Dask Name: logical_and, 7200 tasks\n", "Dask Series Structure:\n", "npartitions=300\n", "2000-01-01 00:00:00 bool\n", "2000-01-01 02:24:00 ...\n", " ... \n", "2000-01-30 21:36:00 ...\n", "2000-01-31 00:00:00 ...\n", "Name: y, dtype: bool\n", "Dask Name: logical_and, 7200 tasks\n", "Dask Series Structure:\n", "npartitions=300\n", "2000-01-01 00:00:00 bool\n", "2000-01-01 02:24:00 ...\n", " ... \n", "2000-01-30 21:36:00 ...\n", "2000-01-31 00:00:00 ...\n", "Name: y, dtype: bool\n", "Dask Name: logical_and, 7200 tasks\n", "Dask Series Structure:\n", "npartitions=300\n", "2000-01-01 00:00:00 bool\n", "2000-01-01 02:24:00 ...\n", " ... \n", "2000-01-30 21:36:00 ...\n", "2000-01-31 00:00:00 ...\n", "Name: y, dtype: bool\n", "Dask Name: logical_and, 7200 tasks\n", "Dask Series Structure:\n", "npartitions=300\n", "2000-01-01 00:00:00 bool\n", "2000-01-01 02:24:00 ...\n", " ... \n", "2000-01-30 21:36:00 ...\n", "2000-01-31 00:00:00 ...\n", "Name: y, dtype: bool\n", "Dask Name: logical_and, 7200 tasks\n", "Dask Series Structure:\n", "npartitions=300\n", "2000-01-01 00:00:00 bool\n", "2000-01-01 02:24:00 ...\n", " ... \n", "2000-01-30 21:36:00 ...\n", "2000-01-31 00:00:00 ...\n", "Name: y, dtype: bool\n", "Dask Name: logical_and, 7200 tasks\n", "Dask Series Structure:\n", "npartitions=300\n", "2000-01-01 00:00:00 bool\n", "2000-01-01 02:24:00 ...\n", " ... \n", "2000-01-30 21:36:00 ...\n", "2000-01-31 00:00:00 ...\n", "Name: y, dtype: bool\n", "Dask Name: logical_and, 7200 tasks\n", "Dask Series Structure:\n", "npartitions=300\n", "2000-01-01 00:00:00 bool\n", "2000-01-01 02:24:00 ...\n", " ... \n", "2000-01-30 21:36:00 ...\n", "2000-01-31 00:00:00 ...\n", "Name: y, dtype: bool\n", "Dask Name: logical_and, 7200 tasks\n", "Dask Series Structure:\n", "npartitions=300\n", "2000-01-01 00:00:00 bool\n", "2000-01-01 02:24:00 ...\n", " ... \n", "2000-01-30 21:36:00 ...\n", "2000-01-31 00:00:00 ...\n", "Name: y, dtype: bool\n", "Dask Name: logical_and, 2100 tasks\n", "Dask Series Structure:\n", "npartitions=300\n", "2000-01-01 00:00:00 bool\n", "2000-01-01 02:24:00 ...\n", " ... \n", "2000-01-30 21:36:00 ...\n", "2000-01-31 00:00:00 ...\n", "Name: y, dtype: bool\n", "Dask Name: logical_and, 2100 tasks\n", "Dask Series Structure:\n", "npartitions=300\n", "2000-01-01 00:00:00 bool\n", "2000-01-01 02:24:00 ...\n", " ... \n", "2000-01-30 21:36:00 ...\n", "2000-01-31 00:00:00 ...\n", "Name: y, dtype: bool\n", "Dask Name: logical_and, 2100 tasks\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "Dask Series Structure:\n", "npartitions=300\n", "2000-01-01 00:00:00 bool\n", "2000-01-01 02:24:00 ...\n", " ... \n", "2000-01-30 21:36:00 ...\n", "2000-01-31 00:00:00 ...\n", "Name: y, dtype: bool\n", "Dask Name: logical_and, 2100 tasks\n", "Dask Series Structure:\n", "npartitions=300\n", "2000-01-01 00:00:00 bool\n", "2000-01-01 02:24:00 ...\n", " ... \n", "2000-01-30 21:36:00 ...\n", "2000-01-31 00:00:00 ...\n", "Name: y, dtype: bool\n", "Dask Name: logical_and, 2100 tasks\n", "Dask Series Structure:\n", "npartitions=300\n", "2000-01-01 00:00:00 bool\n", "2000-01-01 02:24:00 ...\n", " ... \n", "2000-01-30 21:36:00 ...\n", "2000-01-31 00:00:00 ...\n", "Name: y, dtype: bool\n", "Dask Name: logical_and, 2100 tasks\n", "Dask Series Structure:\n", "npartitions=300\n", "2000-01-01 00:00:00 bool\n", "2000-01-01 02:24:00 ...\n", " ... \n", "2000-01-30 21:36:00 ...\n", "2000-01-31 00:00:00 ...\n", "Name: y, dtype: bool\n", "Dask Name: logical_and, 2100 tasks\n", "Dask Series Structure:\n", "npartitions=300\n", "2000-01-01 00:00:00 bool\n", "2000-01-01 02:24:00 ...\n", " ... \n", "2000-01-30 21:36:00 ...\n", "2000-01-31 00:00:00 ...\n", "Name: y, dtype: bool\n", "Dask Name: logical_and, 2100 tasks\n", "Dask Series Structure:\n", "npartitions=300\n", "2000-01-01 00:00:00 bool\n", "2000-01-01 02:24:00 ...\n", " ... \n", "2000-01-30 21:36:00 ...\n", "2000-01-31 00:00:00 ...\n", "Name: y, dtype: bool\n", "Dask Name: logical_and, 2100 tasks\n", "Dask Series Structure:\n", "npartitions=300\n", "2000-01-01 00:00:00 bool\n", "2000-01-01 02:24:00 ...\n", " ... \n", "2000-01-30 21:36:00 ...\n", "2000-01-31 00:00:00 ...\n", "Name: y, dtype: bool\n", "Dask Name: logical_and, 2100 tasks\n", "Dask Series Structure:\n", "npartitions=300\n", "2000-01-01 00:00:00 bool\n", "2000-01-01 02:24:00 ...\n", " ... \n", "2000-01-30 21:36:00 ...\n", "2000-01-31 00:00:00 ...\n", "Name: y, dtype: bool\n", "Dask Name: logical_and, 2100 tasks\n", "Dask Series Structure:\n", "npartitions=300\n", "2000-01-01 00:00:00 bool\n", "2000-01-01 02:24:00 ...\n", " ... \n", "2000-01-30 21:36:00 ...\n", "2000-01-31 00:00:00 ...\n", "Name: y, dtype: bool\n", "Dask Name: logical_and, 2100 tasks\n", "Dask Series Structure:\n", "npartitions=300\n", "2000-01-01 00:00:00 bool\n", "2000-01-01 02:24:00 ...\n", " ... \n", "2000-01-30 21:36:00 ...\n", "2000-01-31 00:00:00 ...\n", "Name: y, dtype: bool\n", "Dask Name: logical_and, 2100 tasks\n", "Dask Series Structure:\n", "npartitions=300\n", "2000-01-01 00:00:00 bool\n", "2000-01-01 02:24:00 ...\n", " ... \n", "2000-01-30 21:36:00 ...\n", "2000-01-31 00:00:00 ...\n", "Name: y, dtype: bool\n", "Dask Name: logical_and, 2100 tasks\n", "Dask Series Structure:\n", "npartitions=300\n", "2000-01-01 00:00:00 bool\n", "2000-01-01 02:24:00 ...\n", " ... \n", "2000-01-30 21:36:00 ...\n", "2000-01-31 00:00:00 ...\n", "Name: y, dtype: bool\n", "Dask Name: logical_and, 2100 tasks\n", "Dask Series Structure:\n", "npartitions=300\n", "2000-01-01 00:00:00 bool\n", "2000-01-01 02:24:00 ...\n", " ... \n", "2000-01-30 21:36:00 ...\n", "2000-01-31 00:00:00 ...\n", "Name: y, dtype: bool\n", "Dask Name: logical_and, 2100 tasks\n", "Dask Series Structure:\n", "npartitions=300\n", "2000-01-01 00:00:00 bool\n", "2000-01-01 02:24:00 ...\n", " ... \n", "2000-01-30 21:36:00 ...\n", "2000-01-31 00:00:00 ...\n", "Name: y, dtype: bool\n", "Dask Name: logical_and, 2100 tasks\n", "Dask Series Structure:\n", "npartitions=300\n", "2000-01-01 00:00:00 bool\n", "2000-01-01 02:24:00 ...\n", " ... \n", "2000-01-30 21:36:00 ...\n", "2000-01-31 00:00:00 ...\n", "Name: y, dtype: bool\n", "Dask Name: logical_and, 2100 tasks\n", "Dask Series Structure:\n", "npartitions=300\n", "2000-01-01 00:00:00 bool\n", "2000-01-01 02:24:00 ...\n", " ... \n", "2000-01-30 21:36:00 ...\n", "2000-01-31 00:00:00 ...\n", "Name: y, dtype: bool\n", "Dask Name: logical_and, 2100 tasks\n", "Dask Series Structure:\n", "npartitions=300\n", "2000-01-01 00:00:00 bool\n", "2000-01-01 02:24:00 ...\n", " ... \n", "2000-01-30 21:36:00 ...\n", "2000-01-31 00:00:00 ...\n", "Name: y, dtype: bool\n", "Dask Name: logical_and, 2100 tasks\n", "Dask Series Structure:\n", "npartitions=300\n", "2000-01-01 00:00:00 bool\n", "2000-01-01 02:24:00 ...\n", " ... \n", "2000-01-30 21:36:00 ...\n", "2000-01-31 00:00:00 ...\n", "Name: y, dtype: bool\n", "Dask Name: logical_and, 3600 tasks\n", "Dask Series Structure:\n", "npartitions=300\n", "2000-01-01 00:00:00 bool\n", "2000-01-01 02:24:00 ...\n", " ... \n", "2000-01-30 21:36:00 ...\n", "2000-01-31 00:00:00 ...\n", "Name: y, dtype: bool\n", "Dask Name: logical_and, 3600 tasks\n", "Dask Series Structure:\n", "npartitions=300\n", "2000-01-01 00:00:00 bool\n", "2000-01-01 02:24:00 ...\n", " ... \n", "2000-01-30 21:36:00 ...\n", "2000-01-31 00:00:00 ...\n", "Name: y, dtype: bool\n", "Dask Name: logical_and, 3600 tasks\n", "Dask Series Structure:\n", "npartitions=300\n", "2000-01-01 00:00:00 bool\n", "2000-01-01 02:24:00 ...\n", " ... \n", "2000-01-30 21:36:00 ...\n", "2000-01-31 00:00:00 ...\n", "Name: y, dtype: bool\n", "Dask Name: logical_and, 3600 tasks\n", "Dask Series Structure:\n", "npartitions=300\n", "2000-01-01 00:00:00 bool\n", "2000-01-01 02:24:00 ...\n", " ... \n", "2000-01-30 21:36:00 ...\n", "2000-01-31 00:00:00 ...\n", "Name: y, dtype: bool\n", "Dask Name: logical_and, 3600 tasks\n", "Dask Series Structure:\n", "npartitions=300\n", "2000-01-01 00:00:00 bool\n", "2000-01-01 02:24:00 ...\n", " ... \n", "2000-01-30 21:36:00 ...\n", "2000-01-31 00:00:00 ...\n", "Name: y, dtype: bool\n", "Dask Name: logical_and, 3600 tasks\n", "Dask Series Structure:\n", "npartitions=300\n", "2000-01-01 00:00:00 bool\n", "2000-01-01 02:24:00 ...\n", " ... \n", "2000-01-30 21:36:00 ...\n", "2000-01-31 00:00:00 ...\n", "Name: y, dtype: bool\n", "Dask Name: logical_and, 3600 tasks\n", "Dask Series Structure:\n", "npartitions=300\n", "2000-01-01 00:00:00 bool\n", "2000-01-01 02:24:00 ...\n", " ... \n", "2000-01-30 21:36:00 ...\n", "2000-01-31 00:00:00 ...\n", "Name: y, dtype: bool\n", "Dask Name: logical_and, 3600 tasks\n", "Dask Series Structure:\n", "npartitions=300\n", "2000-01-01 00:00:00 bool\n", "2000-01-01 02:24:00 ...\n", " ... \n", "2000-01-30 21:36:00 ...\n", "2000-01-31 00:00:00 ...\n", "Name: y, dtype: bool\n", "Dask Name: logical_and, 3600 tasks\n", "Dask Series Structure:\n", "npartitions=300\n", "2000-01-01 00:00:00 bool\n", "2000-01-01 02:24:00 ...\n", " ... \n", "2000-01-30 21:36:00 ...\n", "2000-01-31 00:00:00 ...\n", "Name: y, dtype: bool\n", "Dask Name: logical_and, 3600 tasks\n", "Dask Series Structure:\n", "npartitions=300\n", "2000-01-01 00:00:00 bool\n", "2000-01-01 02:24:00 ...\n", " ... \n", "2000-01-30 21:36:00 ...\n", "2000-01-31 00:00:00 ...\n", "Name: y, dtype: bool\n", "Dask Name: logical_and, 3600 tasks\n", "Dask Series Structure:\n", "npartitions=300\n", "2000-01-01 00:00:00 bool\n", "2000-01-01 02:24:00 ...\n", " ... \n", "2000-01-30 21:36:00 ...\n", "2000-01-31 00:00:00 ...\n", "Name: y, dtype: bool\n", "Dask Name: logical_and, 3600 tasks\n", "Dask Series Structure:\n", "npartitions=300\n", "2000-01-01 00:00:00 bool\n", "2000-01-01 02:24:00 ...\n", " ... \n", "2000-01-30 21:36:00 ...\n", "2000-01-31 00:00:00 ...\n", "Name: y, dtype: bool\n", "Dask Name: logical_and, 3600 tasks\n", "Dask Series Structure:\n", "npartitions=300\n", "2000-01-01 00:00:00 bool\n", "2000-01-01 02:24:00 ...\n", " ... \n", "2000-01-30 21:36:00 ...\n", "2000-01-31 00:00:00 ...\n", "Name: y, dtype: bool\n", "Dask Name: logical_and, 3600 tasks\n", "Dask Series Structure:\n", "npartitions=300\n", "2000-01-01 00:00:00 bool\n", "2000-01-01 02:24:00 ...\n", " ... \n", "2000-01-30 21:36:00 ...\n", "2000-01-31 00:00:00 ...\n", "Name: y, dtype: bool\n", "Dask Name: logical_and, 3600 tasks\n", "Dask Series Structure:\n", "npartitions=300\n", "2000-01-01 00:00:00 bool\n", "2000-01-01 02:24:00 ...\n", " ... \n", "2000-01-30 21:36:00 ...\n", "2000-01-31 00:00:00 ...\n", "Name: y, dtype: bool\n", "Dask Name: logical_and, 3600 tasks\n", "Dask Series Structure:\n", "npartitions=300\n", "2000-01-01 00:00:00 bool\n", "2000-01-01 02:24:00 ...\n", " ... \n", "2000-01-30 21:36:00 ...\n", "2000-01-31 00:00:00 ...\n", "Name: y, dtype: bool\n", "Dask Name: logical_and, 3600 tasks\n", "Dask Series Structure:\n", "npartitions=300\n", "2000-01-01 00:00:00 bool\n", "2000-01-01 02:24:00 ...\n", " ... \n", "2000-01-30 21:36:00 ...\n", "2000-01-31 00:00:00 ...\n", "Name: y, dtype: bool\n", "Dask Name: logical_and, 3600 tasks\n", "Dask Series Structure:\n", "npartitions=300\n", "2000-01-01 00:00:00 bool\n", "2000-01-01 02:24:00 ...\n", " ... \n", "2000-01-30 21:36:00 ...\n", "2000-01-31 00:00:00 ...\n", "Name: y, dtype: bool\n", "Dask Name: logical_and, 3600 tasks\n", "Dask Series Structure:\n", "npartitions=300\n", "2000-01-01 00:00:00 bool\n", "2000-01-01 02:24:00 ...\n", " ... \n", "2000-01-30 21:36:00 ...\n", "2000-01-31 00:00:00 ...\n", "Name: y, dtype: bool\n", "Dask Name: logical_and, 3600 tasks\n", "Dask Series Structure:\n", "npartitions=300\n", "2000-01-01 00:00:00 bool\n", "2000-01-01 02:24:00 ...\n", " ... \n", "2000-01-30 21:36:00 ...\n", "2000-01-31 00:00:00 ...\n", "Name: x, dtype: bool\n", "Dask Name: logical_and, 7800 tasks\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "Dask Series Structure:\n", "npartitions=300\n", "2000-01-01 00:00:00 bool\n", "2000-01-01 02:24:00 ...\n", " ... \n", "2000-01-30 21:36:00 ...\n", "2000-01-31 00:00:00 ...\n", "Name: x, dtype: bool\n", "Dask Name: logical_and, 7800 tasks\n", "Dask Series Structure:\n", "npartitions=300\n", "2000-01-01 00:00:00 bool\n", "2000-01-01 02:24:00 ...\n", " ... \n", "2000-01-30 21:36:00 ...\n", "2000-01-31 00:00:00 ...\n", "Name: x, dtype: bool\n", "Dask Name: logical_and, 7800 tasks\n", "Dask Series Structure:\n", "npartitions=300\n", "2000-01-01 00:00:00 bool\n", "2000-01-01 02:24:00 ...\n", " ... \n", "2000-01-30 21:36:00 ...\n", "2000-01-31 00:00:00 ...\n", "Name: x, dtype: bool\n", "Dask Name: logical_and, 7800 tasks\n", "Dask Series Structure:\n", "npartitions=300\n", "2000-01-01 00:00:00 bool\n", "2000-01-01 02:24:00 ...\n", " ... \n", "2000-01-30 21:36:00 ...\n", "2000-01-31 00:00:00 ...\n", "Name: x, dtype: bool\n", "Dask Name: logical_and, 7800 tasks\n", "Dask Series Structure:\n", "npartitions=300\n", "2000-01-01 00:00:00 bool\n", "2000-01-01 02:24:00 ...\n", " ... \n", "2000-01-30 21:36:00 ...\n", "2000-01-31 00:00:00 ...\n", "Name: x, dtype: bool\n", "Dask Name: logical_and, 7800 tasks\n", "Dask Series Structure:\n", "npartitions=300\n", "2000-01-01 00:00:00 bool\n", "2000-01-01 02:24:00 ...\n", " ... \n", "2000-01-30 21:36:00 ...\n", "2000-01-31 00:00:00 ...\n", "Name: x, dtype: bool\n", "Dask Name: logical_and, 7800 tasks\n", "Dask Series Structure:\n", "npartitions=300\n", "2000-01-01 00:00:00 bool\n", "2000-01-01 02:24:00 ...\n", " ... \n", "2000-01-30 21:36:00 ...\n", "2000-01-31 00:00:00 ...\n", "Name: x, dtype: bool\n", "Dask Name: logical_and, 7800 tasks\n", "Dask Series Structure:\n", "npartitions=300\n", "2000-01-01 00:00:00 bool\n", "2000-01-01 02:24:00 ...\n", " ... \n", "2000-01-30 21:36:00 ...\n", "2000-01-31 00:00:00 ...\n", "Name: x, dtype: bool\n", "Dask Name: logical_and, 7800 tasks\n", "Dask Series Structure:\n", "npartitions=300\n", "2000-01-01 00:00:00 bool\n", "2000-01-01 02:24:00 ...\n", " ... \n", "2000-01-30 21:36:00 ...\n", "2000-01-31 00:00:00 ...\n", "Name: x, dtype: bool\n", "Dask Name: logical_and, 7800 tasks\n", "Dask Series Structure:\n", "npartitions=300\n", "2000-01-01 00:00:00 bool\n", "2000-01-01 02:24:00 ...\n", " ... \n", "2000-01-30 21:36:00 ...\n", "2000-01-31 00:00:00 ...\n", "Name: x, dtype: bool\n", "Dask Name: logical_and, 7800 tasks\n", "Dask Series Structure:\n", "npartitions=300\n", "2000-01-01 00:00:00 bool\n", "2000-01-01 02:24:00 ...\n", " ... \n", "2000-01-30 21:36:00 ...\n", "2000-01-31 00:00:00 ...\n", "Name: x, dtype: bool\n", "Dask Name: logical_and, 7800 tasks\n", "Dask Series Structure:\n", "npartitions=300\n", "2000-01-01 00:00:00 bool\n", "2000-01-01 02:24:00 ...\n", " ... \n", "2000-01-30 21:36:00 ...\n", "2000-01-31 00:00:00 ...\n", "Name: x, dtype: bool\n", "Dask Name: logical_and, 7800 tasks\n", "Dask Series Structure:\n", "npartitions=300\n", "2000-01-01 00:00:00 bool\n", "2000-01-01 02:24:00 ...\n", " ... \n", "2000-01-30 21:36:00 ...\n", "2000-01-31 00:00:00 ...\n", "Name: x, dtype: bool\n", "Dask Name: logical_and, 7800 tasks\n", "Dask Series Structure:\n", "npartitions=300\n", "2000-01-01 00:00:00 bool\n", "2000-01-01 02:24:00 ...\n", " ... \n", "2000-01-30 21:36:00 ...\n", "2000-01-31 00:00:00 ...\n", "Name: x, dtype: bool\n", "Dask Name: logical_and, 7800 tasks\n", "Dask Series Structure:\n", "npartitions=300\n", "2000-01-01 00:00:00 bool\n", "2000-01-01 02:24:00 ...\n", " ... \n", "2000-01-30 21:36:00 ...\n", "2000-01-31 00:00:00 ...\n", "Name: x, dtype: bool\n", "Dask Name: logical_and, 7800 tasks\n", "Dask Series Structure:\n", "npartitions=300\n", "2000-01-01 00:00:00 bool\n", "2000-01-01 02:24:00 ...\n", " ... \n", "2000-01-30 21:36:00 ...\n", "2000-01-31 00:00:00 ...\n", "Name: x, dtype: bool\n", "Dask Name: logical_and, 7800 tasks\n", "Dask Series Structure:\n", "npartitions=300\n", "2000-01-01 00:00:00 bool\n", "2000-01-01 02:24:00 ...\n", " ... \n", "2000-01-30 21:36:00 ...\n", "2000-01-31 00:00:00 ...\n", "Name: x, dtype: bool\n", "Dask Name: logical_and, 7800 tasks\n", "Dask Series Structure:\n", "npartitions=300\n", "2000-01-01 00:00:00 bool\n", "2000-01-01 02:24:00 ...\n", " ... \n", "2000-01-30 21:36:00 ...\n", "2000-01-31 00:00:00 ...\n", "Name: x, dtype: bool\n", "Dask Name: logical_and, 7800 tasks\n", "Dask Series Structure:\n", "npartitions=300\n", "2000-01-01 00:00:00 bool\n", "2000-01-01 02:24:00 ...\n", " ... \n", "2000-01-30 21:36:00 ...\n", "2000-01-31 00:00:00 ...\n", "Name: x, dtype: bool\n", "Dask Name: logical_and, 7800 tasks\n", "Dask Series Structure:\n", "npartitions=300\n", "2000-01-01 00:00:00 bool\n", "2000-01-01 02:24:00 ...\n", " ... \n", "2000-01-30 21:36:00 ...\n", "2000-01-31 00:00:00 ...\n", "Name: y, dtype: bool\n", "Dask Name: logical_and, 9600 tasks\n", "Dask Series Structure:\n", "npartitions=300\n", "2000-01-01 00:00:00 bool\n", "2000-01-01 02:24:00 ...\n", " ... \n", "2000-01-30 21:36:00 ...\n", "2000-01-31 00:00:00 ...\n", "Name: y, dtype: bool\n", "Dask Name: logical_and, 9600 tasks\n", "Dask Series Structure:\n", "npartitions=300\n", "2000-01-01 00:00:00 bool\n", "2000-01-01 02:24:00 ...\n", " ... \n", "2000-01-30 21:36:00 ...\n", "2000-01-31 00:00:00 ...\n", "Name: y, dtype: bool\n", "Dask Name: logical_and, 9600 tasks\n", "Dask Series Structure:\n", "npartitions=300\n", "2000-01-01 00:00:00 bool\n", "2000-01-01 02:24:00 ...\n", " ... \n", "2000-01-30 21:36:00 ...\n", "2000-01-31 00:00:00 ...\n", "Name: y, dtype: bool\n", "Dask Name: logical_and, 9600 tasks\n", "Dask Series Structure:\n", "npartitions=300\n", "2000-01-01 00:00:00 bool\n", "2000-01-01 02:24:00 ...\n", " ... \n", "2000-01-30 21:36:00 ...\n", "2000-01-31 00:00:00 ...\n", "Name: y, dtype: bool\n", "Dask Name: logical_and, 9600 tasks\n", "Dask Series Structure:\n", "npartitions=300\n", "2000-01-01 00:00:00 bool\n", "2000-01-01 02:24:00 ...\n", " ... \n", "2000-01-30 21:36:00 ...\n", "2000-01-31 00:00:00 ...\n", "Name: y, dtype: bool\n", "Dask Name: logical_and, 9600 tasks\n", "Dask Series Structure:\n", "npartitions=300\n", "2000-01-01 00:00:00 bool\n", "2000-01-01 02:24:00 ...\n", " ... \n", "2000-01-30 21:36:00 ...\n", "2000-01-31 00:00:00 ...\n", "Name: y, dtype: bool\n", "Dask Name: logical_and, 9600 tasks\n", "Dask Series Structure:\n", "npartitions=300\n", "2000-01-01 00:00:00 bool\n", "2000-01-01 02:24:00 ...\n", " ... \n", "2000-01-30 21:36:00 ...\n", "2000-01-31 00:00:00 ...\n", "Name: y, dtype: bool\n", "Dask Name: logical_and, 9600 tasks\n", "Dask Series Structure:\n", "npartitions=300\n", "2000-01-01 00:00:00 bool\n", "2000-01-01 02:24:00 ...\n", " ... \n", "2000-01-30 21:36:00 ...\n", "2000-01-31 00:00:00 ...\n", "Name: y, dtype: bool\n", "Dask Name: logical_and, 9600 tasks\n", "Dask Series Structure:\n", "npartitions=300\n", "2000-01-01 00:00:00 bool\n", "2000-01-01 02:24:00 ...\n", " ... \n", "2000-01-30 21:36:00 ...\n", "2000-01-31 00:00:00 ...\n", "Name: y, dtype: bool\n", "Dask Name: logical_and, 9600 tasks\n", "Dask Series Structure:\n", "npartitions=300\n", "2000-01-01 00:00:00 bool\n", "2000-01-01 02:24:00 ...\n", " ... \n", "2000-01-30 21:36:00 ...\n", "2000-01-31 00:00:00 ...\n", "Name: y, dtype: bool\n", "Dask Name: logical_and, 9600 tasks\n", "Dask Series Structure:\n", "npartitions=300\n", "2000-01-01 00:00:00 bool\n", "2000-01-01 02:24:00 ...\n", " ... \n", "2000-01-30 21:36:00 ...\n", "2000-01-31 00:00:00 ...\n", "Name: y, dtype: bool\n", "Dask Name: logical_and, 9600 tasks\n", "Dask Series Structure:\n", "npartitions=300\n", "2000-01-01 00:00:00 bool\n", "2000-01-01 02:24:00 ...\n", " ... \n", "2000-01-30 21:36:00 ...\n", "2000-01-31 00:00:00 ...\n", "Name: y, dtype: bool\n", "Dask Name: logical_and, 9600 tasks\n", "Dask Series Structure:\n", "npartitions=300\n", "2000-01-01 00:00:00 bool\n", "2000-01-01 02:24:00 ...\n", " ... \n", "2000-01-30 21:36:00 ...\n", "2000-01-31 00:00:00 ...\n", "Name: y, dtype: bool\n", "Dask Name: logical_and, 9600 tasks\n", "Dask Series Structure:\n", "npartitions=300\n", "2000-01-01 00:00:00 bool\n", "2000-01-01 02:24:00 ...\n", " ... \n", "2000-01-30 21:36:00 ...\n", "2000-01-31 00:00:00 ...\n", "Name: y, dtype: bool\n", "Dask Name: logical_and, 9600 tasks\n", "Dask Series Structure:\n", "npartitions=300\n", "2000-01-01 00:00:00 bool\n", "2000-01-01 02:24:00 ...\n", " ... \n", "2000-01-30 21:36:00 ...\n", "2000-01-31 00:00:00 ...\n", "Name: y, dtype: bool\n", "Dask Name: logical_and, 9600 tasks\n", "Dask Series Structure:\n", "npartitions=300\n", "2000-01-01 00:00:00 bool\n", "2000-01-01 02:24:00 ...\n", " ... \n", "2000-01-30 21:36:00 ...\n", "2000-01-31 00:00:00 ...\n", "Name: y, dtype: bool\n", "Dask Name: logical_and, 9600 tasks\n", "Dask Series Structure:\n", "npartitions=300\n", "2000-01-01 00:00:00 bool\n", "2000-01-01 02:24:00 ...\n", " ... \n", "2000-01-30 21:36:00 ...\n", "2000-01-31 00:00:00 ...\n", "Name: y, dtype: bool\n", "Dask Name: logical_and, 9600 tasks\n", "Dask Series Structure:\n", "npartitions=300\n", "2000-01-01 00:00:00 bool\n", "2000-01-01 02:24:00 ...\n", " ... \n", "2000-01-30 21:36:00 ...\n", "2000-01-31 00:00:00 ...\n", "Name: y, dtype: bool\n", "Dask Name: logical_and, 9600 tasks\n", "Dask Series Structure:\n", "npartitions=300\n", "2000-01-01 00:00:00 bool\n", "2000-01-01 02:24:00 ...\n", " ... \n", "2000-01-30 21:36:00 ...\n", "2000-01-31 00:00:00 ...\n", "Name: y, dtype: bool\n", "Dask Name: logical_and, 9600 tasks\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "Dask Series Structure:\n", "npartitions=300\n", "2000-01-01 00:00:00 bool\n", "2000-01-01 02:24:00 ...\n", " ... \n", "2000-01-30 21:36:00 ...\n", "2000-01-31 00:00:00 ...\n", "Name: y, dtype: bool\n", "Dask Name: logical_and, 6600 tasks\n", "Dask Series Structure:\n", "npartitions=300\n", "2000-01-01 00:00:00 bool\n", "2000-01-01 02:24:00 ...\n", " ... \n", "2000-01-30 21:36:00 ...\n", "2000-01-31 00:00:00 ...\n", "Name: y, dtype: bool\n", "Dask Name: logical_and, 6600 tasks\n", "Dask Series Structure:\n", "npartitions=300\n", "2000-01-01 00:00:00 bool\n", "2000-01-01 02:24:00 ...\n", " ... \n", "2000-01-30 21:36:00 ...\n", "2000-01-31 00:00:00 ...\n", "Name: y, dtype: bool\n", "Dask Name: logical_and, 6600 tasks\n", "Dask Series Structure:\n", "npartitions=300\n", "2000-01-01 00:00:00 bool\n", "2000-01-01 02:24:00 ...\n", " ... \n", "2000-01-30 21:36:00 ...\n", "2000-01-31 00:00:00 ...\n", "Name: y, dtype: bool\n", "Dask Name: logical_and, 6600 tasks\n", "Dask Series Structure:\n", "npartitions=300\n", "2000-01-01 00:00:00 bool\n", "2000-01-01 02:24:00 ...\n", " ... \n", "2000-01-30 21:36:00 ...\n", "2000-01-31 00:00:00 ...\n", "Name: y, dtype: bool\n", "Dask Name: logical_and, 6600 tasks\n", "Dask Series Structure:\n", "npartitions=300\n", "2000-01-01 00:00:00 bool\n", "2000-01-01 02:24:00 ...\n", " ... \n", "2000-01-30 21:36:00 ...\n", "2000-01-31 00:00:00 ...\n", "Name: y, dtype: bool\n", "Dask Name: logical_and, 6600 tasks\n", "Dask Series Structure:\n", "npartitions=300\n", "2000-01-01 00:00:00 bool\n", "2000-01-01 02:24:00 ...\n", " ... \n", "2000-01-30 21:36:00 ...\n", "2000-01-31 00:00:00 ...\n", "Name: y, dtype: bool\n", "Dask Name: logical_and, 6600 tasks\n", "Dask Series Structure:\n", "npartitions=300\n", "2000-01-01 00:00:00 bool\n", "2000-01-01 02:24:00 ...\n", " ... \n", "2000-01-30 21:36:00 ...\n", "2000-01-31 00:00:00 ...\n", "Name: y, dtype: bool\n", "Dask Name: logical_and, 6600 tasks\n", "Dask Series Structure:\n", "npartitions=300\n", "2000-01-01 00:00:00 bool\n", "2000-01-01 02:24:00 ...\n", " ... \n", "2000-01-30 21:36:00 ...\n", "2000-01-31 00:00:00 ...\n", "Name: y, dtype: bool\n", "Dask Name: logical_and, 6600 tasks\n", "Dask Series Structure:\n", "npartitions=300\n", "2000-01-01 00:00:00 bool\n", "2000-01-01 02:24:00 ...\n", " ... \n", "2000-01-30 21:36:00 ...\n", "2000-01-31 00:00:00 ...\n", "Name: y, dtype: bool\n", "Dask Name: logical_and, 6600 tasks\n", "Dask Series Structure:\n", "npartitions=300\n", "2000-01-01 00:00:00 bool\n", "2000-01-01 02:24:00 ...\n", " ... \n", "2000-01-30 21:36:00 ...\n", "2000-01-31 00:00:00 ...\n", "Name: y, dtype: bool\n", "Dask Name: logical_and, 6600 tasks\n", "Dask Series Structure:\n", "npartitions=300\n", "2000-01-01 00:00:00 bool\n", "2000-01-01 02:24:00 ...\n", " ... \n", "2000-01-30 21:36:00 ...\n", "2000-01-31 00:00:00 ...\n", "Name: y, dtype: bool\n", "Dask Name: logical_and, 6600 tasks\n", "Dask Series Structure:\n", "npartitions=300\n", "2000-01-01 00:00:00 bool\n", "2000-01-01 02:24:00 ...\n", " ... \n", "2000-01-30 21:36:00 ...\n", "2000-01-31 00:00:00 ...\n", "Name: y, dtype: bool\n", "Dask Name: logical_and, 6600 tasks\n", "Dask Series Structure:\n", "npartitions=300\n", "2000-01-01 00:00:00 bool\n", "2000-01-01 02:24:00 ...\n", " ... \n", "2000-01-30 21:36:00 ...\n", "2000-01-31 00:00:00 ...\n", "Name: y, dtype: bool\n", "Dask Name: logical_and, 6600 tasks\n", "Dask Series Structure:\n", "npartitions=300\n", "2000-01-01 00:00:00 bool\n", "2000-01-01 02:24:00 ...\n", " ... \n", "2000-01-30 21:36:00 ...\n", "2000-01-31 00:00:00 ...\n", "Name: y, dtype: bool\n", "Dask Name: logical_and, 6600 tasks\n", "Dask Series Structure:\n", "npartitions=300\n", "2000-01-01 00:00:00 bool\n", "2000-01-01 02:24:00 ...\n", " ... \n", "2000-01-30 21:36:00 ...\n", "2000-01-31 00:00:00 ...\n", "Name: y, dtype: bool\n", "Dask Name: logical_and, 6600 tasks\n", "Dask Series Structure:\n", "npartitions=300\n", "2000-01-01 00:00:00 bool\n", "2000-01-01 02:24:00 ...\n", " ... \n", "2000-01-30 21:36:00 ...\n", "2000-01-31 00:00:00 ...\n", "Name: y, dtype: bool\n", "Dask Name: logical_and, 6600 tasks\n", "Dask Series Structure:\n", "npartitions=300\n", "2000-01-01 00:00:00 bool\n", "2000-01-01 02:24:00 ...\n", " ... \n", "2000-01-30 21:36:00 ...\n", "2000-01-31 00:00:00 ...\n", "Name: y, dtype: bool\n", "Dask Name: logical_and, 6600 tasks\n", "Dask Series Structure:\n", "npartitions=300\n", "2000-01-01 00:00:00 bool\n", "2000-01-01 02:24:00 ...\n", " ... \n", "2000-01-30 21:36:00 ...\n", "2000-01-31 00:00:00 ...\n", "Name: y, dtype: bool\n", "Dask Name: logical_and, 6600 tasks\n", "Dask Series Structure:\n", "npartitions=300\n", "2000-01-01 00:00:00 bool\n", "2000-01-01 02:24:00 ...\n", " ... \n", "2000-01-30 21:36:00 ...\n", "2000-01-31 00:00:00 ...\n", "Name: y, dtype: bool\n", "Dask Name: logical_and, 6600 tasks\n", "Dask Series Structure:\n", "npartitions=300\n", "2000-01-01 00:00:00 bool\n", "2000-01-01 02:24:00 ...\n", " ... \n", "2000-01-30 21:36:00 ...\n", "2000-01-31 00:00:00 ...\n", "Name: y, dtype: bool\n", "Dask Name: logical_and, 7800 tasks\n", "Dask Series Structure:\n", "npartitions=300\n", "2000-01-01 00:00:00 bool\n", "2000-01-01 02:24:00 ...\n", " ... \n", "2000-01-30 21:36:00 ...\n", "2000-01-31 00:00:00 ...\n", "Name: y, dtype: bool\n", "Dask Name: logical_and, 7800 tasks\n", "Dask Series Structure:\n", "npartitions=300\n", "2000-01-01 00:00:00 bool\n", "2000-01-01 02:24:00 ...\n", " ... \n", "2000-01-30 21:36:00 ...\n", "2000-01-31 00:00:00 ...\n", "Name: y, dtype: bool\n", "Dask Name: logical_and, 7800 tasks\n", "Dask Series Structure:\n", "npartitions=300\n", "2000-01-01 00:00:00 bool\n", "2000-01-01 02:24:00 ...\n", " ... \n", "2000-01-30 21:36:00 ...\n", "2000-01-31 00:00:00 ...\n", "Name: y, dtype: bool\n", "Dask Name: logical_and, 7800 tasks\n", "Dask Series Structure:\n", "npartitions=300\n", "2000-01-01 00:00:00 bool\n", "2000-01-01 02:24:00 ...\n", " ... \n", "2000-01-30 21:36:00 ...\n", "2000-01-31 00:00:00 ...\n", "Name: y, dtype: bool\n", "Dask Name: logical_and, 7800 tasks\n", "Dask Series Structure:\n", "npartitions=300\n", "2000-01-01 00:00:00 bool\n", "2000-01-01 02:24:00 ...\n", " ... \n", "2000-01-30 21:36:00 ...\n", "2000-01-31 00:00:00 ...\n", "Name: y, dtype: bool\n", "Dask Name: logical_and, 7800 tasks\n", "Dask Series Structure:\n", "npartitions=300\n", "2000-01-01 00:00:00 bool\n", "2000-01-01 02:24:00 ...\n", " ... \n", "2000-01-30 21:36:00 ...\n", "2000-01-31 00:00:00 ...\n", "Name: y, dtype: bool\n", "Dask Name: logical_and, 7800 tasks\n", "Dask Series Structure:\n", "npartitions=300\n", "2000-01-01 00:00:00 bool\n", "2000-01-01 02:24:00 ...\n", " ... \n", "2000-01-30 21:36:00 ...\n", "2000-01-31 00:00:00 ...\n", "Name: y, dtype: bool\n", "Dask Name: logical_and, 7800 tasks\n", "Dask Series Structure:\n", "npartitions=300\n", "2000-01-01 00:00:00 bool\n", "2000-01-01 02:24:00 ...\n", " ... \n", "2000-01-30 21:36:00 ...\n", "2000-01-31 00:00:00 ...\n", "Name: y, dtype: bool\n", "Dask Name: logical_and, 7800 tasks\n", "Dask Series Structure:\n", "npartitions=300\n", "2000-01-01 00:00:00 bool\n", "2000-01-01 02:24:00 ...\n", " ... \n", "2000-01-30 21:36:00 ...\n", "2000-01-31 00:00:00 ...\n", "Name: y, dtype: bool\n", "Dask Name: logical_and, 7800 tasks\n", "Dask Series Structure:\n", "npartitions=300\n", "2000-01-01 00:00:00 bool\n", "2000-01-01 02:24:00 ...\n", " ... \n", "2000-01-30 21:36:00 ...\n", "2000-01-31 00:00:00 ...\n", "Name: y, dtype: bool\n", "Dask Name: logical_and, 7800 tasks\n", "Dask Series Structure:\n", "npartitions=300\n", "2000-01-01 00:00:00 bool\n", "2000-01-01 02:24:00 ...\n", " ... \n", "2000-01-30 21:36:00 ...\n", "2000-01-31 00:00:00 ...\n", "Name: y, dtype: bool\n", "Dask Name: logical_and, 7800 tasks\n", "Dask Series Structure:\n", "npartitions=300\n", "2000-01-01 00:00:00 bool\n", "2000-01-01 02:24:00 ...\n", " ... \n", "2000-01-30 21:36:00 ...\n", "2000-01-31 00:00:00 ...\n", "Name: y, dtype: bool\n", "Dask Name: logical_and, 7800 tasks\n", "Dask Series Structure:\n", "npartitions=300\n", "2000-01-01 00:00:00 bool\n", "2000-01-01 02:24:00 ...\n", " ... \n", "2000-01-30 21:36:00 ...\n", "2000-01-31 00:00:00 ...\n", "Name: y, dtype: bool\n", "Dask Name: logical_and, 7800 tasks\n", "Dask Series Structure:\n", "npartitions=300\n", "2000-01-01 00:00:00 bool\n", "2000-01-01 02:24:00 ...\n", " ... \n", "2000-01-30 21:36:00 ...\n", "2000-01-31 00:00:00 ...\n", "Name: y, dtype: bool\n", "Dask Name: logical_and, 7800 tasks\n", "Dask Series Structure:\n", "npartitions=300\n", "2000-01-01 00:00:00 bool\n", "2000-01-01 02:24:00 ...\n", " ... \n", "2000-01-30 21:36:00 ...\n", "2000-01-31 00:00:00 ...\n", "Name: y, dtype: bool\n", "Dask Name: logical_and, 7800 tasks\n", "Dask Series Structure:\n", "npartitions=300\n", "2000-01-01 00:00:00 bool\n", "2000-01-01 02:24:00 ...\n", " ... \n", "2000-01-30 21:36:00 ...\n", "2000-01-31 00:00:00 ...\n", "Name: y, dtype: bool\n", "Dask Name: logical_and, 7800 tasks\n", "Dask Series Structure:\n", "npartitions=300\n", "2000-01-01 00:00:00 bool\n", "2000-01-01 02:24:00 ...\n", " ... \n", "2000-01-30 21:36:00 ...\n", "2000-01-31 00:00:00 ...\n", "Name: y, dtype: bool\n", "Dask Name: logical_and, 7800 tasks\n", "Dask Series Structure:\n", "npartitions=300\n", "2000-01-01 00:00:00 bool\n", "2000-01-01 02:24:00 ...\n", " ... \n", "2000-01-30 21:36:00 ...\n", "2000-01-31 00:00:00 ...\n", "Name: y, dtype: bool\n", "Dask Name: logical_and, 7800 tasks\n", "Dask Series Structure:\n", "npartitions=300\n", "2000-01-01 00:00:00 bool\n", "2000-01-01 02:24:00 ...\n", " ... \n", "2000-01-30 21:36:00 ...\n", "2000-01-31 00:00:00 ...\n", "Name: y, dtype: bool\n", "Dask Name: logical_and, 7800 tasks\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "Dask Series Structure:\n", "npartitions=300\n", "2000-01-01 00:00:00 bool\n", "2000-01-01 02:24:00 ...\n", " ... \n", "2000-01-30 21:36:00 ...\n", "2000-01-31 00:00:00 ...\n", "Name: y, dtype: bool\n", "Dask Name: logical_and, 9600 tasks\n", "Dask Series Structure:\n", "npartitions=300\n", "2000-01-01 00:00:00 bool\n", "2000-01-01 02:24:00 ...\n", " ... \n", "2000-01-30 21:36:00 ...\n", "2000-01-31 00:00:00 ...\n", "Name: y, dtype: bool\n", "Dask Name: logical_and, 9600 tasks\n", "Dask Series Structure:\n", "npartitions=300\n", "2000-01-01 00:00:00 bool\n", "2000-01-01 02:24:00 ...\n", " ... \n", "2000-01-30 21:36:00 ...\n", "2000-01-31 00:00:00 ...\n", "Name: y, dtype: bool\n", "Dask Name: logical_and, 9600 tasks\n", "Dask Series Structure:\n", "npartitions=300\n", "2000-01-01 00:00:00 bool\n", "2000-01-01 02:24:00 ...\n", " ... \n", "2000-01-30 21:36:00 ...\n", "2000-01-31 00:00:00 ...\n", "Name: y, dtype: bool\n", "Dask Name: logical_and, 9600 tasks\n", "Dask Series Structure:\n", "npartitions=300\n", "2000-01-01 00:00:00 bool\n", "2000-01-01 02:24:00 ...\n", " ... \n", "2000-01-30 21:36:00 ...\n", "2000-01-31 00:00:00 ...\n", "Name: y, dtype: bool\n", "Dask Name: logical_and, 9600 tasks\n", "Dask Series Structure:\n", "npartitions=300\n", "2000-01-01 00:00:00 bool\n", "2000-01-01 02:24:00 ...\n", " ... \n", "2000-01-30 21:36:00 ...\n", "2000-01-31 00:00:00 ...\n", "Name: y, dtype: bool\n", "Dask Name: logical_and, 9600 tasks\n", "Dask Series Structure:\n", "npartitions=300\n", "2000-01-01 00:00:00 bool\n", "2000-01-01 02:24:00 ...\n", " ... \n", "2000-01-30 21:36:00 ...\n", "2000-01-31 00:00:00 ...\n", "Name: y, dtype: bool\n", "Dask Name: logical_and, 9600 tasks\n", "Dask Series Structure:\n", "npartitions=300\n", "2000-01-01 00:00:00 bool\n", "2000-01-01 02:24:00 ...\n", " ... \n", "2000-01-30 21:36:00 ...\n", "2000-01-31 00:00:00 ...\n", "Name: y, dtype: bool\n", "Dask Name: logical_and, 9600 tasks\n", "Dask Series Structure:\n", "npartitions=300\n", "2000-01-01 00:00:00 bool\n", "2000-01-01 02:24:00 ...\n", " ... \n", "2000-01-30 21:36:00 ...\n", "2000-01-31 00:00:00 ...\n", "Name: y, dtype: bool\n", "Dask Name: logical_and, 9600 tasks\n", "Dask Series Structure:\n", "npartitions=300\n", "2000-01-01 00:00:00 bool\n", "2000-01-01 02:24:00 ...\n", " ... \n", "2000-01-30 21:36:00 ...\n", "2000-01-31 00:00:00 ...\n", "Name: y, dtype: bool\n", "Dask Name: logical_and, 9600 tasks\n", "Dask Series Structure:\n", "npartitions=300\n", "2000-01-01 00:00:00 bool\n", "2000-01-01 02:24:00 ...\n", " ... \n", "2000-01-30 21:36:00 ...\n", "2000-01-31 00:00:00 ...\n", "Name: y, dtype: bool\n", "Dask Name: logical_and, 9600 tasks\n", "Dask Series Structure:\n", "npartitions=300\n", "2000-01-01 00:00:00 bool\n", "2000-01-01 02:24:00 ...\n", " ... \n", "2000-01-30 21:36:00 ...\n", "2000-01-31 00:00:00 ...\n", "Name: y, dtype: bool\n", "Dask Name: logical_and, 9600 tasks\n", "Dask Series Structure:\n", "npartitions=300\n", "2000-01-01 00:00:00 bool\n", "2000-01-01 02:24:00 ...\n", " ... \n", "2000-01-30 21:36:00 ...\n", "2000-01-31 00:00:00 ...\n", "Name: y, dtype: bool\n", "Dask Name: logical_and, 9600 tasks\n", "Dask Series Structure:\n", "npartitions=300\n", "2000-01-01 00:00:00 bool\n", "2000-01-01 02:24:00 ...\n", " ... \n", "2000-01-30 21:36:00 ...\n", "2000-01-31 00:00:00 ...\n", "Name: y, dtype: bool\n", "Dask Name: logical_and, 9600 tasks\n", "Dask Series Structure:\n", "npartitions=300\n", "2000-01-01 00:00:00 bool\n", "2000-01-01 02:24:00 ...\n", " ... \n", "2000-01-30 21:36:00 ...\n", "2000-01-31 00:00:00 ...\n", "Name: y, dtype: bool\n", "Dask Name: logical_and, 9600 tasks\n", "Dask Series Structure:\n", "npartitions=300\n", "2000-01-01 00:00:00 bool\n", "2000-01-01 02:24:00 ...\n", " ... \n", "2000-01-30 21:36:00 ...\n", "2000-01-31 00:00:00 ...\n", "Name: y, dtype: bool\n", "Dask Name: logical_and, 9600 tasks\n", "Dask Series Structure:\n", "npartitions=300\n", "2000-01-01 00:00:00 bool\n", "2000-01-01 02:24:00 ...\n", " ... \n", "2000-01-30 21:36:00 ...\n", "2000-01-31 00:00:00 ...\n", "Name: y, dtype: bool\n", "Dask Name: logical_and, 9600 tasks\n", "Dask Series Structure:\n", "npartitions=300\n", "2000-01-01 00:00:00 bool\n", "2000-01-01 02:24:00 ...\n", " ... \n", "2000-01-30 21:36:00 ...\n", "2000-01-31 00:00:00 ...\n", "Name: y, dtype: bool\n", "Dask Name: logical_and, 9600 tasks\n", "Dask Series Structure:\n", "npartitions=300\n", "2000-01-01 00:00:00 bool\n", "2000-01-01 02:24:00 ...\n", " ... \n", "2000-01-30 21:36:00 ...\n", "2000-01-31 00:00:00 ...\n", "Name: y, dtype: bool\n", "Dask Name: logical_and, 9600 tasks\n", "Dask Series Structure:\n", "npartitions=300\n", "2000-01-01 00:00:00 bool\n", "2000-01-01 02:24:00 ...\n", " ... \n", "2000-01-30 21:36:00 ...\n", "2000-01-31 00:00:00 ...\n", "Name: y, dtype: bool\n", "Dask Name: logical_and, 9600 tasks\n", "Dask Series Structure:\n", "npartitions=300\n", "2000-01-01 00:00:00 bool\n", "2000-01-01 02:24:00 ...\n", " ... \n", "2000-01-30 21:36:00 ...\n", "2000-01-31 00:00:00 ...\n", "Name: x, dtype: bool\n", "Dask Name: logical_and, 10800 tasks\n", "Dask Series Structure:\n", "npartitions=300\n", "2000-01-01 00:00:00 bool\n", "2000-01-01 02:24:00 ...\n", " ... \n", "2000-01-30 21:36:00 ...\n", "2000-01-31 00:00:00 ...\n", "Name: x, dtype: bool\n", "Dask Name: logical_and, 10800 tasks\n", "Dask Series Structure:\n", "npartitions=300\n", "2000-01-01 00:00:00 bool\n", "2000-01-01 02:24:00 ...\n", " ... \n", "2000-01-30 21:36:00 ...\n", "2000-01-31 00:00:00 ...\n", "Name: x, dtype: bool\n", "Dask Name: logical_and, 10800 tasks\n", "Dask Series Structure:\n", "npartitions=300\n", "2000-01-01 00:00:00 bool\n", "2000-01-01 02:24:00 ...\n", " ... \n", "2000-01-30 21:36:00 ...\n", "2000-01-31 00:00:00 ...\n", "Name: x, dtype: bool\n", "Dask Name: logical_and, 10800 tasks\n", "Dask Series Structure:\n", "npartitions=300\n", "2000-01-01 00:00:00 bool\n", "2000-01-01 02:24:00 ...\n", " ... \n", "2000-01-30 21:36:00 ...\n", "2000-01-31 00:00:00 ...\n", "Name: x, dtype: bool\n", "Dask Name: logical_and, 10800 tasks\n", "Dask Series Structure:\n", "npartitions=300\n", "2000-01-01 00:00:00 bool\n", "2000-01-01 02:24:00 ...\n", " ... \n", "2000-01-30 21:36:00 ...\n", "2000-01-31 00:00:00 ...\n", "Name: x, dtype: bool\n", "Dask Name: logical_and, 10800 tasks\n", "Dask Series Structure:\n", "npartitions=300\n", "2000-01-01 00:00:00 bool\n", "2000-01-01 02:24:00 ...\n", " ... \n", "2000-01-30 21:36:00 ...\n", "2000-01-31 00:00:00 ...\n", "Name: x, dtype: bool\n", "Dask Name: logical_and, 10800 tasks\n", "Dask Series Structure:\n", "npartitions=300\n", "2000-01-01 00:00:00 bool\n", "2000-01-01 02:24:00 ...\n", " ... \n", "2000-01-30 21:36:00 ...\n", "2000-01-31 00:00:00 ...\n", "Name: x, dtype: bool\n", "Dask Name: logical_and, 10800 tasks\n", "Dask Series Structure:\n", "npartitions=300\n", "2000-01-01 00:00:00 bool\n", "2000-01-01 02:24:00 ...\n", " ... \n", "2000-01-30 21:36:00 ...\n", "2000-01-31 00:00:00 ...\n", "Name: x, dtype: bool\n", "Dask Name: logical_and, 10800 tasks\n", "Dask Series Structure:\n", "npartitions=300\n", "2000-01-01 00:00:00 bool\n", "2000-01-01 02:24:00 ...\n", " ... \n", "2000-01-30 21:36:00 ...\n", "2000-01-31 00:00:00 ...\n", "Name: x, dtype: bool\n", "Dask Name: logical_and, 10800 tasks\n", "Dask Series Structure:\n", "npartitions=300\n", "2000-01-01 00:00:00 bool\n", "2000-01-01 02:24:00 ...\n", " ... \n", "2000-01-30 21:36:00 ...\n", "2000-01-31 00:00:00 ...\n", "Name: x, dtype: bool\n", "Dask Name: logical_and, 10800 tasks\n", "Dask Series Structure:\n", "npartitions=300\n", "2000-01-01 00:00:00 bool\n", "2000-01-01 02:24:00 ...\n", " ... \n", "2000-01-30 21:36:00 ...\n", "2000-01-31 00:00:00 ...\n", "Name: x, dtype: bool\n", "Dask Name: logical_and, 10800 tasks\n", "Dask Series Structure:\n", "npartitions=300\n", "2000-01-01 00:00:00 bool\n", "2000-01-01 02:24:00 ...\n", " ... \n", "2000-01-30 21:36:00 ...\n", "2000-01-31 00:00:00 ...\n", "Name: x, dtype: bool\n", "Dask Name: logical_and, 10800 tasks\n", "Dask Series Structure:\n", "npartitions=300\n", "2000-01-01 00:00:00 bool\n", "2000-01-01 02:24:00 ...\n", " ... \n", "2000-01-30 21:36:00 ...\n", "2000-01-31 00:00:00 ...\n", "Name: x, dtype: bool\n", "Dask Name: logical_and, 10800 tasks\n", "Dask Series Structure:\n", "npartitions=300\n", "2000-01-01 00:00:00 bool\n", "2000-01-01 02:24:00 ...\n", " ... \n", "2000-01-30 21:36:00 ...\n", "2000-01-31 00:00:00 ...\n", "Name: x, dtype: bool\n", "Dask Name: logical_and, 10800 tasks\n", "Dask Series Structure:\n", "npartitions=300\n", "2000-01-01 00:00:00 bool\n", "2000-01-01 02:24:00 ...\n", " ... \n", "2000-01-30 21:36:00 ...\n", "2000-01-31 00:00:00 ...\n", "Name: x, dtype: bool\n", "Dask Name: logical_and, 10800 tasks\n", "Dask Series Structure:\n", "npartitions=300\n", "2000-01-01 00:00:00 bool\n", "2000-01-01 02:24:00 ...\n", " ... \n", "2000-01-30 21:36:00 ...\n", "2000-01-31 00:00:00 ...\n", "Name: x, dtype: bool\n", "Dask Name: logical_and, 10800 tasks\n", "Dask Series Structure:\n", "npartitions=300\n", "2000-01-01 00:00:00 bool\n", "2000-01-01 02:24:00 ...\n", " ... \n", "2000-01-30 21:36:00 ...\n", "2000-01-31 00:00:00 ...\n", "Name: x, dtype: bool\n", "Dask Name: logical_and, 10800 tasks\n", "Dask Series Structure:\n", "npartitions=300\n", "2000-01-01 00:00:00 bool\n", "2000-01-01 02:24:00 ...\n", " ... \n", "2000-01-30 21:36:00 ...\n", "2000-01-31 00:00:00 ...\n", "Name: x, dtype: bool\n", "Dask Name: logical_and, 10800 tasks\n", "Dask Series Structure:\n", "npartitions=300\n", "2000-01-01 00:00:00 bool\n", "2000-01-01 02:24:00 ...\n", " ... \n", "2000-01-30 21:36:00 ...\n", "2000-01-31 00:00:00 ...\n", "Name: x, dtype: bool\n", "Dask Name: logical_and, 10800 tasks\n", "[dd.Scalar, dd.Scalar, dd.Scalar, dd.Scalar, dd.Scalar, dd.Scalar, dd.Scalar, dd.Scalar, dd.Scalar, dd.Scalar, dd.Scalar, dd.Scalar, dd.Scalar, dd.Scalar, dd.Scalar, dd.Scalar, dd.Scalar, dd.Scalar, dd.Scalar, dd.Scalar, dd.Scalar, dd.Scalar, dd.Scalar, dd.Scalar, dd.Scalar, dd.Scalar, dd.Scalar, dd.Scalar, dd.Scalar, dd.Scalar, dd.Scalar, dd.Scalar, dd.Scalar, dd.Scalar, dd.Scalar, dd.Scalar, dd.Scalar, dd.Scalar, dd.Scalar, dd.Scalar, dd.Scalar, dd.Scalar, dd.Scalar, dd.Scalar, dd.Scalar, dd.Scalar, dd.Scalar, dd.Scalar, dd.Scalar, dd.Scalar, dd.Scalar, dd.Scalar, dd.Scalar, dd.Scalar, dd.Scalar, dd.Scalar, dd.Scalar, dd.Scalar, dd.Scalar, dd.Scalar, dd.Scalar, dd.Scalar, dd.Scalar, dd.Scalar, dd.Scalar, dd.Scalar, dd.Scalar, dd.Scalar, dd.Scalar, dd.Scalar, dd.Scalar, dd.Scalar, dd.Scalar, dd.Scalar, dd.Scalar, dd.Scalar, dd.Scalar, dd.Scalar, dd.Scalar, dd.Scalar, dd.Scalar, dd.Scalar, dd.Scalar, dd.Scalar, dd.Scalar, dd.Scalar, dd.Scalar, dd.Scalar, dd.Scalar, dd.Scalar, dd.Scalar, dd.Scalar, dd.Scalar, dd.Scalar, dd.Scalar, dd.Scalar, dd.Scalar, dd.Scalar, dd.Scalar, dd.Scalar, dd.Scalar, dd.Scalar, dd.Scalar, dd.Scalar, dd.Scalar, dd.Scalar, dd.Scalar, dd.Scalar, dd.Scalar, dd.Scalar, dd.Scalar, dd.Scalar, dd.Scalar, dd.Scalar, dd.Scalar, dd.Scalar, dd.Scalar, dd.Scalar, dd.Scalar, dd.Scalar, dd.Scalar, dd.Scalar, dd.Scalar, dd.Scalar, dd.Scalar, dd.Scalar, dd.Scalar, dd.Scalar, dd.Scalar, dd.Scalar, dd.Scalar, dd.Scalar, dd.Scalar, dd.Scalar, dd.Scalar, dd.Scalar, dd.Scalar, dd.Scalar, dd.Scalar, dd.Scalar, dd.Scalar, dd.Scalar, dd.Scalar, dd.Scalar, dd.Scalar, dd.Scalar, dd.Scalar, dd.Scalar, dd.Scalar, dd.Scalar, dd.Scalar, dd.Scalar, dd.Scalar, dd.Scalar, dd.Scalar, dd.Scalar, dd.Scalar, dd.Scalar, dd.Scalar, dd.Scalar, dd.Scalar, dd.Scalar, dd.Scalar, dd.Scalar, dd.Scalar, dd.Scalar, dd.Scalar, dd.Scalar, dd.Scalar, dd.Scalar, dd.Scalar, dd.Scalar, dd.Scalar, dd.Scalar, dd.Scalar, dd.Scalar, dd.Scalar, dd.Scalar, dd.Scalar, dd.Scalar, dd.Scalar, dd.Scalar, dd.Scalar, dd.Scalar, dd.Scalar, dd.Scalar, dd.Scalar, dd.Scalar, dd.Scalar, dd.Scalar, dd.Scalar, dd.Scalar, dd.Scalar, dd.Scalar, dd.Scalar, dd.Scalar, dd.Scalar, dd.Scalar, dd.Scalar, dd.Scalar]\n", "0.0030128955841064453\n", "0.0\n" ] } ], "source": [ "def random_indexer(df):\n", " indexer = ~df.index.isnull()\n", " for i in range(np.random.randint(15)+1):\n", " col = np.random.choice(['x','y'])\n", " value = np.random.uniform(-1,1)\n", " op = np.random.choice([lambda x, y: x < y, lambda x, y: x > y])\n", " indexer = np.logical_and(indexer, op(df[col], value))\n", " return indexer\n", "\n", "def random_statistic(indexer, df):\n", " print(indexer)\n", " col = np.random.choice(['x', 'y', 'name'])\n", " if col == 'name':\n", " op = np.random.choice([lambda x: x.unique().size, np.min, np.max])\n", " else:\n", " op = np.random.choice([lambda x: x.unique().size, np.min, np.max, np.sum, np.mean])\n", " return op(df[col][indexer])\n", "\n", "np.random.seed(137)\n", "\n", "stats = []\n", "for i in range(10):\n", " ind = random_indexer(df)\n", " for k in range(20):\n", " stats.append(random_statistic(ind, df))\n", "\n", "st = time.time()\n", "print(stats)\n", "# stat_computed = client.compute(stats)\n", "ft = time.time()\n", "print(ft-st)\n", "\n", "st = time.time()\n", "# stat_results = client.gather(stat_computed)\n", "ft = time.time()\n", "print(ft-st)" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "ename": "CancelledError", "evalue": "('repartition-merge-39715e6237c8baf832ed85d511f135a3', 0)", "output_type": "error", "traceback": [ "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[1;31mCancelledError\u001b[0m Traceback (most recent call last)", "\u001b[1;32m\u001b[0m in \u001b[0;36m\u001b[1;34m\u001b[0m\n\u001b[1;32m----> 1\u001b[1;33m \u001b[0mdf\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mhead\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m", "\u001b[1;32m~\\Anaconda3\\lib\\site-packages\\dask\\dataframe\\core.py\u001b[0m in \u001b[0;36mhead\u001b[1;34m(self, n, npartitions, compute)\u001b[0m\n\u001b[0;32m 874\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 875\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0mcompute\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 876\u001b[1;33m \u001b[0mresult\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mresult\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mcompute\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 877\u001b[0m \u001b[1;32mreturn\u001b[0m \u001b[0mresult\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 878\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n", "\u001b[1;32m~\\Anaconda3\\lib\\site-packages\\dask\\base.py\u001b[0m in \u001b[0;36mcompute\u001b[1;34m(self, **kwargs)\u001b[0m\n\u001b[0;32m 154\u001b[0m \u001b[0mdask\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mbase\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mcompute\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 155\u001b[0m \"\"\"\n\u001b[1;32m--> 156\u001b[1;33m \u001b[1;33m(\u001b[0m\u001b[0mresult\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m)\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mcompute\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mtraverse\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;32mFalse\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 157\u001b[0m \u001b[1;32mreturn\u001b[0m \u001b[0mresult\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 158\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n", "\u001b[1;32m~\\Anaconda3\\lib\\site-packages\\dask\\base.py\u001b[0m in \u001b[0;36mcompute\u001b[1;34m(*args, **kwargs)\u001b[0m\n\u001b[0;32m 395\u001b[0m \u001b[0mkeys\u001b[0m \u001b[1;33m=\u001b[0m \u001b[1;33m[\u001b[0m\u001b[0mx\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m__dask_keys__\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m \u001b[1;32mfor\u001b[0m \u001b[0mx\u001b[0m \u001b[1;32min\u001b[0m \u001b[0mcollections\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 396\u001b[0m \u001b[0mpostcomputes\u001b[0m \u001b[1;33m=\u001b[0m \u001b[1;33m[\u001b[0m\u001b[0mx\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m__dask_postcompute__\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m \u001b[1;32mfor\u001b[0m \u001b[0mx\u001b[0m \u001b[1;32min\u001b[0m \u001b[0mcollections\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 397\u001b[1;33m \u001b[0mresults\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mschedule\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mdsk\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mkeys\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 398\u001b[0m \u001b[1;32mreturn\u001b[0m \u001b[0mrepack\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m[\u001b[0m\u001b[0mf\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mr\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m*\u001b[0m\u001b[0ma\u001b[0m\u001b[1;33m)\u001b[0m \u001b[1;32mfor\u001b[0m \u001b[0mr\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m(\u001b[0m\u001b[0mf\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0ma\u001b[0m\u001b[1;33m)\u001b[0m \u001b[1;32min\u001b[0m \u001b[0mzip\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mresults\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mpostcomputes\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 399\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n", "\u001b[1;32m~\\Anaconda3\\lib\\site-packages\\distributed\\client.py\u001b[0m in \u001b[0;36mget\u001b[1;34m(self, dsk, keys, restrictions, loose_restrictions, resources, sync, asynchronous, direct, retries, priority, fifo_timeout, actors, **kwargs)\u001b[0m\n\u001b[0;32m 2307\u001b[0m \u001b[0mretries\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mretries\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 2308\u001b[0m \u001b[0muser_priority\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mpriority\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m-> 2309\u001b[1;33m \u001b[0mactors\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mactors\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 2310\u001b[0m )\n\u001b[0;32m 2311\u001b[0m \u001b[0mpacked\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mpack_data\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mkeys\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mfutures\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", "\u001b[1;32m~\\Anaconda3\\lib\\site-packages\\distributed\\client.py\u001b[0m in \u001b[0;36m_graph_to_futures\u001b[1;34m(self, dsk, keys, restrictions, loose_restrictions, priority, user_priority, resources, retries, fifo_timeout, actors)\u001b[0m\n\u001b[0;32m 2232\u001b[0m \u001b[1;32mfor\u001b[0m \u001b[0mv\u001b[0m \u001b[1;32min\u001b[0m \u001b[0ms\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 2233\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0mv\u001b[0m \u001b[1;32mnot\u001b[0m \u001b[1;32min\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mfutures\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m-> 2234\u001b[1;33m \u001b[1;32mraise\u001b[0m \u001b[0mCancelledError\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mv\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 2235\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 2236\u001b[0m \u001b[0mdependencies\u001b[0m \u001b[1;33m=\u001b[0m \u001b[1;33m{\u001b[0m\u001b[0mk\u001b[0m\u001b[1;33m:\u001b[0m \u001b[0mget_dependencies\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mdsk\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mk\u001b[0m\u001b[1;33m)\u001b[0m \u001b[1;32mfor\u001b[0m \u001b[0mk\u001b[0m \u001b[1;32min\u001b[0m \u001b[0mdsk\u001b[0m\u001b[1;33m}\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", "\u001b[1;31mCancelledError\u001b[0m: ('repartition-merge-39715e6237c8baf832ed85d511f135a3', 0)" ] } ], "source": [ "df.head()" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [ { "ename": "NameError", "evalue": "name 'op' is not defined", "output_type": "error", "traceback": [ "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[1;31mNameError\u001b[0m Traceback (most recent call last)", "\u001b[1;32m\u001b[0m in \u001b[0;36m\u001b[1;34m\u001b[0m\n\u001b[1;32m----> 1\u001b[1;33m \u001b[0mdf\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mop\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mload\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mcsv\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m\"https://raw.githubusercontent.com/ironmussa/Optimus/master/examples/data/foo.csv\"\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0msep\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;34m\",\"\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mheader\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;32mTrue\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0minfer_schema\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;34m'false'\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mnull_value\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;34m\"None\"\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m", "\u001b[1;31mNameError\u001b[0m: name 'op' is not defined" ] } ], "source": [] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [ { "ename": "AttributeError", "evalue": "module 'numpy' has no attribute 'mode'", "output_type": "error", "traceback": [ "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[1;31mAttributeError\u001b[0m Traceback (most recent call last)", "\u001b[1;32m\u001b[0m in \u001b[0;36m\u001b[1;34m\u001b[0m\n\u001b[1;32m----> 1\u001b[1;33m \u001b[0mstat_computed\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mclient\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mcompute\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mnp\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mmode\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mdf\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;34m\"id\"\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 2\u001b[0m \u001b[0mstat_results\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mclient\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mgather\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mstat_computed\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", "\u001b[1;31mAttributeError\u001b[0m: module 'numpy' has no attribute 'mode'" ] } ], "source": [ "stat_computed = client.compute(np.mode(df[\"id\"]))\n", "stat_results = client.gather(stat_computed)" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "1\n" ] } ], "source": [ "print(stat_results)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.6" } }, "nbformat": 4, "nbformat_minor": 4 }