{ "cells": [ { "cell_type": "markdown", "metadata": { "inputHidden": false, "outputHidden": false }, "source": [ "# Profiler performance\n", "\n", "We use the part of the instacart data that you can find here https://www.instacart.com/datasets/grocery-shopping-2017\n", "\n", "Specically order_products__prior.csv a 4 columns, 33.2 Million rows csv file.\n", "\n", "Before 2.2.10\n", "It took 355.58 seconds to process all the data set in a Windows 10, \n", "Instacart data\n", "\n", "After 2.2.10\n", "It took 78 sec. infer== False\n" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "%load_ext autoreload\n", "%autoreload 2" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "import sys\n", "sys.path.append(\"..\")" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "scrolled": false }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "C:\\Users\\argenisleon\\Anaconda3\\lib\\site-packages\\socks.py:58: DeprecationWarning: Using or importing the ABCs from 'collections' instead of from 'collections.abc' is deprecated, and in 3.8 it will stop working\n", " from collections import Callable\n", "\n", " You are using PySparkling of version 2.4.10, but your PySpark is of\n", " version 2.3.1. Please make sure Spark and PySparkling versions are compatible. \n", "INFO:optimus:Operative System:Windows\n", "INFO:optimus:Just check that Spark and all necessary environments vars are present...\n", "INFO:optimus:-----\n", "INFO:optimus:SPARK_HOME=C:\\opt\\spark\\spark-2.3.1-bin-hadoop2.7\n", "INFO:optimus:HADOOP_HOME=C:\\opt\\hadoop-2.7.7\n", "INFO:optimus:PYSPARK_PYTHON=C:\\Users\\argenisleon\\Anaconda3\\python.exe\n", "INFO:optimus:PYSPARK_DRIVER_PYTHON=jupyter\n", "INFO:optimus:PYSPARK_SUBMIT_ARGS=--jars \"file:///C:/Users/argenisleon/Documents/Optimus/optimus/jars/RedshiftJDBC42-1.2.16.1027.jar,file:///C:/Users/argenisleon/Documents/Optimus/optimus/jars/mysql-connector-java-8.0.16.jar,file:///C:/Users/argenisleon/Documents/Optimus/optimus/jars/ojdbc8.jar,file:///C:/Users/argenisleon/Documents/Optimus/optimus/jars/postgresql-42.2.5.jar\" --driver-class-path \"C:/Users/argenisleon/Documents/Optimus/optimus/jars/RedshiftJDBC42-1.2.16.1027.jar;C:/Users/argenisleon/Documents/Optimus/optimus/jars/mysql-connector-java-8.0.16.jar;C:/Users/argenisleon/Documents/Optimus/optimus/jars/ojdbc8.jar;C:/Users/argenisleon/Documents/Optimus/optimus/jars/postgresql-42.2.5.jar\" --conf \"spark.sql.catalogImplementation=hive\" pyspark-shell\n", "INFO:optimus:JAVA_HOME=C:\\java\n", "INFO:optimus:Pyarrow Installed\n", "INFO:optimus:-----\n", "INFO:optimus:Starting or getting SparkSession and SparkContext...\n", "INFO:optimus:Spark Version:2.3.1\n", "INFO:optimus:Setting checkpoint folder local. If you are in a cluster initialize Optimus with master='your_ip' as param\n", "INFO:optimus:Deleting previous folder if exists...\n", "INFO:optimus:Creating the checkpoint directory...\n", "INFO:optimus:\n", " ____ __ _ \n", " / __ \\____ / /_(_)___ ___ __ _______\n", " / / / / __ \\/ __/ / __ `__ \\/ / / / ___/\n", " / /_/ / /_/ / /_/ / / / / / / /_/ (__ ) \n", " \\____/ .___/\\__/_/_/ /_/ /_/\\__,_/____/ \n", " /_/ \n", " \n", "INFO:optimus:Transform and Roll out...\n", "INFO:optimus:Optimus successfully imported. Have fun :).\n", "INFO:optimus:Config.ini not found\n" ] } ], "source": [ "# Create optimus\n", "from optimus import Optimus\n", "op = Optimus(master=\"local[*]\", app_name = \"optimus\" ,verbose =True, checkpoint= True)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Benchmark " ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "df = op.load.csv(\"C:\\\\Users\\\\argenisleon\\\\Desktop\\\\order_products__prior.csv\")" ] }, { "cell_type": "code", "execution_count": 5, "metadata": { "scrolled": false }, "outputs": [ { "data": { "text/html": [ "\n", "\n", "\n", "\n", "\n", "
Viewing 10 of 32.4 million rows / 4 columns
\n", "
8 partition(s)
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
\n", "
order_id
\n", "
1 (int)
\n", "
\n", " \n", " nullable\n", " \n", "
\n", "
\n", "
product_id
\n", "
2 (int)
\n", "
\n", " \n", " nullable\n", " \n", "
\n", "
\n", "
add_to_cart_order
\n", "
3 (int)
\n", "
\n", " \n", " nullable\n", " \n", "
\n", "
\n", "
reordered
\n", "
4 (int)
\n", "
\n", " \n", " nullable\n", " \n", "
\n", "
\n", "
2\n", "
\n", "
\n", "
33120\n", "
\n", "
\n", "
1\n", "
\n", "
\n", "
1\n", "
\n", "
\n", "
2\n", "
\n", "
\n", "
28985\n", "
\n", "
\n", "
2\n", "
\n", "
\n", "
1\n", "
\n", "
\n", "
2\n", "
\n", "
\n", "
9327\n", "
\n", "
\n", "
3\n", "
\n", "
\n", "
0\n", "
\n", "
\n", "
2\n", "
\n", "
\n", "
45918\n", "
\n", "
\n", "
4\n", "
\n", "
\n", "
1\n", "
\n", "
\n", "
2\n", "
\n", "
\n", "
30035\n", "
\n", "
\n", "
5\n", "
\n", "
\n", "
0\n", "
\n", "
\n", "
2\n", "
\n", "
\n", "
17794\n", "
\n", "
\n", "
6\n", "
\n", "
\n", "
1\n", "
\n", "
\n", "
2\n", "
\n", "
\n", "
40141\n", "
\n", "
\n", "
7\n", "
\n", "
\n", "
1\n", "
\n", "
\n", "
2\n", "
\n", "
\n", "
1819\n", "
\n", "
\n", "
8\n", "
\n", "
\n", "
1\n", "
\n", "
\n", "
2\n", "
\n", "
\n", "
43668\n", "
\n", "
\n", "
9\n", "
\n", "
\n", "
0\n", "
\n", "
\n", "
3\n", "
\n", "
\n", "
33754\n", "
\n", "
\n", "
1\n", "
\n", "
\n", "
1\n", "
\n", "
\n", "\n", "\n", "
Viewing 10 of 32.4 million rows / 4 columns
\n", "
8 partition(s)
\n" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "df.table()" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "+--------+-----+\n", "|order_id|count|\n", "+--------+-----+\n", "| 1564244| 145|\n", "| 790903| 137|\n", "| 61355| 127|\n", "| 2970392| 121|\n", "| 2069920| 116|\n", "| 3308010| 115|\n", "| 2753324| 114|\n", "| 2499774| 112|\n", "| 2621625| 109|\n", "| 77151| 109|\n", "| 2136777| 108|\n", "| 1657096| 108|\n", "| 2648316| 105|\n", "| 1892299| 104|\n", "| 171934| 104|\n", "| 653887| 102|\n", "| 1384519| 102|\n", "| 1867980| 102|\n", "| 3282039| 101|\n", "| 3052353| 101|\n", "+--------+-----+\n", "only showing top 20 rows\n", "\n", "Wall time: 15.9 s\n" ] } ], "source": [ "%%time\n", "df.groupBy(\"order_id\").count().sort(\"count\",ascending=False).show()" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "ename": "NameError", "evalue": "name 'df' is not defined", "output_type": "error", "traceback": [ "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[1;31mNameError\u001b[0m Traceback (most recent call last)", "\u001b[1;32m\u001b[0m in \u001b[0;36m\u001b[1;34m\u001b[0m\n", "\u001b[1;31mNameError\u001b[0m: name 'df' is not defined" ] } ], "source": [ "%%time\n", "df.cols.frequency(\"order_id\")" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "INFO:optimus:Procesing General Stats...\n", "INFO:optimus:general_stats() executed in 37.12 sec\n", "INFO:optimus:Using 'column_exp' to process column 'order_id' with function _cast_to\n", "INFO:optimus:cast_columns() executed in 0.01 sec\n", "INFO:optimus:Processing Frequency ...\n", "INFO:optimus:dataset_info() executed in 0.07 sec\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "Wall time: 2min 43s\n" ] }, { "data": { "text/plain": [ "'{\"columns\": {\"order_id\": {\"stats\": {\"count_uniques_agg\": 3025302, \"min\": 2, \"max\": 3421083, \"stddev\": 987300.69645, \"kurtosis\": -1.19913, \"mean\": 1710748.51894, \"skewness\": -0.00049, \"sum\": 55487254019416, \"variance\": 974762665216.534, \"zeros\": 0, \"count_na\": 0, \"hist\": [{\"count\": 3239832.0, \"lower\": 2.0, \"upper\": 342110.1}, {\"count\": 3243756.0, \"lower\": 342110.1, \"upper\": 684218.2}, {\"count\": 3237942.0, \"lower\": 684218.2, \"upper\": 1026326.3}, {\"count\": 3243055.0, \"lower\": 1026326.3, \"upper\": 1368434.4}, {\"count\": 3247818.0, \"lower\": 1368434.4, \"upper\": 1710542.5}, {\"count\": 3249098.0, \"lower\": 1710542.5, \"upper\": 2052650.6}, {\"count\": 3243017.0, \"lower\": 2052650.6, \"upper\": 2394758.7}, {\"count\": 3248231.0, \"lower\": 2394758.7, \"upper\": 2736866.8}, {\"count\": 3241887.0, \"lower\": 2736866.8, \"upper\": 3078974.9}, {\"count\": 3239843.0, \"lower\": 3078974.9, \"upper\": 3421083.0}], \"percentile\": {\"0.75\": 2, \"0.95\": 2, \"0.05\": 2, \"0.25\": 2, \"0.5\": 2}, \"missing_count\": 0, \"p_missing\": 0.0, \"range\": 3421081, \"median\": 2, \"interquartile_range\": 0, \"coef_variation\": 0.57712, \"mad\": 0}, \"frequency\": [{\"value\": 1564244, \"count\": 145}, {\"value\": 790903, \"count\": 137}, {\"value\": 61355, \"count\": 127}, {\"value\": 2970392, \"count\": 121}, {\"value\": 2069920, \"count\": 116}, {\"value\": 3308010, \"count\": 115}, {\"value\": 2753324, \"count\": 114}, {\"value\": 2499774, \"count\": 112}, {\"value\": 77151, \"count\": 109}, {\"value\": 2621625, \"count\": 109}], \"name\": \"order_id\", \"column_dtype\": \"int\", \"dtypes_stats\": {\"string\": 0, \"bool\": 0, \"int\": 32434489, \"float\": 0, \"double\": 0, \"date\": 0, \"array\": 0, \"null\": 0, \"missing\": 0}, \"column_type\": \"numeric\"}}, \"name\": null, \"rows_count\": \"32.4 million\", \"count_types\": {\"numeric\": 1, \"categorical\": 0, \"null\": 0, \"array\": 0, \"date\": 0, \"bool\": 0}, \"size\": \"262.3 MB\", \"summary\": {\"cols_count\": 4, \"rows_count\": 32434489, \"missing_count\": \"0.0%\", \"size\": \"262.1 MB\"}, \"sample\": {\"columns\": [{\"title\": \"order_id\"}, {\"title\": \"product_id\"}, {\"title\": \"add_to_cart_order\"}, {\"title\": \"reordered\"}], \"value\": [[934811], [1149543], [1156809], [1320736], [1343186], [1565712], [1968204], [2450020], [2508067], [2830472]]}}'" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "%%time\n", "op.profiler.to_json(df, \"order_id\", infer=False, relative_error=1)" ] }, { "cell_type": "code", "execution_count": 19, "metadata": {}, "outputs": [], "source": [ "a = df.limit(10) 1:46 2:25" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [ { "ename": "TypeError", "evalue": "'int' object is not iterable", "output_type": "error", "traceback": [ "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[1;31mTypeError\u001b[0m Traceback (most recent call last)", "\u001b[1;32m\u001b[0m in \u001b[0;36m\u001b[1;34m\u001b[0m\n", "\u001b[1;32m~\\Documents\\Optimus\\optimus\\helpers\\decorators.py\u001b[0m in \u001b[0;36mwrapper\u001b[1;34m(*args, **kwargs)\u001b[0m\n\u001b[0;32m 47\u001b[0m \u001b[1;32mdef\u001b[0m \u001b[0mwrapper\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m*\u001b[0m\u001b[0margs\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 48\u001b[0m \u001b[0mstart_time\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mtimeit\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mdefault_timer\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 49\u001b[1;33m \u001b[0mf\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mfunc\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m*\u001b[0m\u001b[0margs\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 50\u001b[0m \u001b[0m_time\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mround\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mtimeit\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mdefault_timer\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m \u001b[1;33m-\u001b[0m \u001b[0mstart_time\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;36m2\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 51\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0mlog_time\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", "\u001b[1;32m~\\Documents\\Optimus\\optimus\\dataframe\\columns.py\u001b[0m in \u001b[0;36mfrequency\u001b[1;34m(columns, n)\u001b[0m\n\u001b[0;32m 1546\u001b[0m \u001b[0mresult\u001b[0m \u001b[1;33m=\u001b[0m \u001b[1;33m{\u001b[0m\u001b[1;33m}\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 1547\u001b[0m \u001b[1;32mfor\u001b[0m \u001b[0mf\u001b[0m \u001b[1;32min\u001b[0m \u001b[0mfreq\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mcollect\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m-> 1548\u001b[1;33m \u001b[0mresult\u001b[0m\u001b[1;33m[\u001b[0m\u001b[0mf\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;36m0\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m]\u001b[0m \u001b[1;33m=\u001b[0m \u001b[1;33m[\u001b[0m\u001b[1;33m{\u001b[0m\u001b[1;34m\"value\"\u001b[0m\u001b[1;33m:\u001b[0m \u001b[0mkv\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;36m0\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;34m\"count\"\u001b[0m\u001b[1;33m:\u001b[0m \u001b[0mkv\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;36m1\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m}\u001b[0m \u001b[1;32mfor\u001b[0m \u001b[0mkv\u001b[0m \u001b[1;32min\u001b[0m \u001b[0mf\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;36m1\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 1549\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 1550\u001b[0m \u001b[1;32mreturn\u001b[0m \u001b[0mresult\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", "\u001b[1;31mTypeError\u001b[0m: 'int' object is not iterable" ] } ], "source": [ "%%time\n", "df.cols.frequency(\"order_id\")" ] }, { "cell_type": "code", "execution_count": 24, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "INFO:optimus:Config.ini not found\n", "INFO:optimus:general_stats() executed in 23.99 sec\n", "INFO:optimus:Processing column 'order_id'...\n", "INFO:optimus:Using 'column_exp' to process column 'order_id' with function _cast_to\n", "INFO:optimus:cast_columns() executed in 0.02 sec\n", "INFO:optimus:------------------------------\n", "INFO:optimus:Processing column 'order_id'...\n", "INFO:optimus:frequency() executed in 1.39 sec\n", "INFO:optimus:dataset_info() executed in 0.08 sec\n" ] }, { "ename": "TemplateNotFound", "evalue": "/out/general_info.html", "output_type": "error", "traceback": [ "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[1;31mTemplateNotFound\u001b[0m Traceback (most recent call last)", "\u001b[1;32m\u001b[0m in \u001b[0;36m\u001b[1;34m\u001b[0m\n\u001b[0;32m 1\u001b[0m \u001b[1;32mfrom\u001b[0m \u001b[0moptimus\u001b[0m \u001b[1;32mimport\u001b[0m \u001b[0mProfiler\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 2\u001b[0m \u001b[0mp\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mProfiler\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m----> 3\u001b[1;33m \u001b[0mp\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mrun\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0ma\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;34m\"order_id\"\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m", "\u001b[1;32m~\\Documents\\Optimus\\optimus\\helpers\\decorators.py\u001b[0m in \u001b[0;36mtimed\u001b[1;34m(*args, **kw)\u001b[0m\n\u001b[0;32m 26\u001b[0m \u001b[1;32mdef\u001b[0m \u001b[0mtimed\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m*\u001b[0m\u001b[0margs\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m**\u001b[0m\u001b[0mkw\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 27\u001b[0m \u001b[0mstart_time\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mtimeit\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mdefault_timer\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 28\u001b[1;33m \u001b[0mf\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mmethod\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m*\u001b[0m\u001b[0margs\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m**\u001b[0m\u001b[0mkw\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 29\u001b[0m \u001b[0m_time\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mround\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mtimeit\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mdefault_timer\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m \u001b[1;33m-\u001b[0m \u001b[0mstart_time\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;36m2\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 30\u001b[0m \u001b[0mlogger\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mprint\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m\"{name}() executed in {time} sec\"\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mformat\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mname\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mmethod\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m__name__\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mtime\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0m_time\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", "\u001b[1;32m~\\Documents\\Optimus\\optimus\\profiler\\profiler.py\u001b[0m in \u001b[0;36mrun\u001b[1;34m(self, df, columns, buckets, infer, relative_error, approx_count)\u001b[0m\n\u001b[0;32m 199\u001b[0m \u001b[0mtemplate_env\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mjinja2\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mEnvironment\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mloader\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mtemplate_loader\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mautoescape\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;32mTrue\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 200\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 201\u001b[1;33m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 202\u001b[0m \u001b[1;31m# Render template\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 203\u001b[0m \u001b[1;31m# Create the profiler info header\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", "\u001b[1;32m~\\Anaconda3\\lib\\site-packages\\jinja2\\environment.py\u001b[0m in \u001b[0;36mget_template\u001b[1;34m(self, name, parent, globals)\u001b[0m\n\u001b[0;32m 828\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0mparent\u001b[0m \u001b[1;32mis\u001b[0m \u001b[1;32mnot\u001b[0m \u001b[1;32mNone\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 829\u001b[0m \u001b[0mname\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mjoin_path\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mname\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mparent\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 830\u001b[1;33m \u001b[1;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_load_template\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mname\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mmake_globals\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mglobals\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 831\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 832\u001b[0m \u001b[1;33m@\u001b[0m\u001b[0minternalcode\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", "\u001b[1;32m~\\Anaconda3\\lib\\site-packages\\jinja2\\environment.py\u001b[0m in \u001b[0;36m_load_template\u001b[1;34m(self, name, globals)\u001b[0m\n\u001b[0;32m 802\u001b[0m template.is_up_to_date):\n\u001b[0;32m 803\u001b[0m \u001b[1;32mreturn\u001b[0m \u001b[0mtemplate\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 804\u001b[1;33m \u001b[0mtemplate\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mloader\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mload\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mname\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mglobals\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 805\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mcache\u001b[0m \u001b[1;32mis\u001b[0m \u001b[1;32mnot\u001b[0m \u001b[1;32mNone\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 806\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mcache\u001b[0m\u001b[1;33m[\u001b[0m\u001b[0mcache_key\u001b[0m\u001b[1;33m]\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mtemplate\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", "\u001b[1;32m~\\Anaconda3\\lib\\site-packages\\jinja2\\loaders.py\u001b[0m in \u001b[0;36mload\u001b[1;34m(self, environment, name, globals)\u001b[0m\n\u001b[0;32m 111\u001b[0m \u001b[1;31m# first we try to get the source for this template together\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 112\u001b[0m \u001b[1;31m# with the filename and the uptodate function.\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 113\u001b[1;33m \u001b[0msource\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mfilename\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0muptodate\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mget_source\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0menvironment\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mname\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 114\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 115\u001b[0m \u001b[1;31m# try to load the code from the bytecode cache if there is a\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", "\u001b[1;32m~\\Anaconda3\\lib\\site-packages\\jinja2\\loaders.py\u001b[0m in \u001b[0;36mget_source\u001b[1;34m(self, environment, template)\u001b[0m\n\u001b[0;32m 185\u001b[0m \u001b[1;32mreturn\u001b[0m \u001b[1;32mFalse\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 186\u001b[0m \u001b[1;32mreturn\u001b[0m \u001b[0mcontents\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mfilename\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0muptodate\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 187\u001b[1;33m \u001b[1;32mraise\u001b[0m \u001b[0mTemplateNotFound\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mtemplate\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 188\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 189\u001b[0m \u001b[1;32mdef\u001b[0m \u001b[0mlist_templates\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", "\u001b[1;31mTemplateNotFound\u001b[0m: /out/general_info.html" ] } ], "source": [ "from optimus import Profiler\n", "p = Profiler()\n", "p.run(a,\"order_id\")" ] }, { "cell_type": "code", "execution_count": 22, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "INFO:optimus:general_stats() executed in 23.64 sec\n", "INFO:optimus:Processing column 'order_id'...\n", "INFO:optimus:Using 'column_exp' to process column 'order_id' with function _cast_to\n", "INFO:optimus:cast_columns() executed in 0.02 sec\n", "INFO:optimus:------------------------------\n", "INFO:optimus:Processing column 'order_id'...\n", "INFO:optimus:frequency() executed in 1.27 sec\n", "INFO:optimus:dataset_info() executed in 0.09 sec\n" ] }, { "ename": "TemplateNotFound", "evalue": "/out/general_info.html", "output_type": "error", "traceback": [ "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[1;31mTemplateNotFound\u001b[0m Traceback (most recent call last)", "\u001b[1;32m\u001b[0m in \u001b[0;36m\u001b[1;34m\u001b[0m\n\u001b[1;32m----> 1\u001b[1;33m \u001b[0mop\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mprofiler\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mrun\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0ma\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;34m\"order_id\"\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0minfer\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;32mTrue\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mrelative_error\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;36m1\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m", "\u001b[1;32m~\\Documents\\Optimus\\optimus\\helpers\\decorators.py\u001b[0m in \u001b[0;36mtimed\u001b[1;34m(*args, **kw)\u001b[0m\n\u001b[0;32m 26\u001b[0m \u001b[1;32mdef\u001b[0m \u001b[0mtimed\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m*\u001b[0m\u001b[0margs\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m**\u001b[0m\u001b[0mkw\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 27\u001b[0m \u001b[0mstart_time\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mtimeit\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mdefault_timer\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 28\u001b[1;33m \u001b[0mf\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mmethod\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m*\u001b[0m\u001b[0margs\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m**\u001b[0m\u001b[0mkw\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 29\u001b[0m \u001b[0m_time\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mround\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mtimeit\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mdefault_timer\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m \u001b[1;33m-\u001b[0m \u001b[0mstart_time\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;36m2\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 30\u001b[0m \u001b[0mlogger\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mprint\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m\"{name}() executed in {time} sec\"\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mformat\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mname\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mmethod\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m__name__\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mtime\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0m_time\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", "\u001b[1;32m~\\Documents\\Optimus\\optimus\\profiler\\profiler.py\u001b[0m in \u001b[0;36mrun\u001b[1;34m(self, df, columns, buckets, infer, relative_error, approx_count)\u001b[0m\n\u001b[0;32m 199\u001b[0m \u001b[1;31m# Render template\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 200\u001b[0m \u001b[1;31m# Create the profiler info header\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 201\u001b[1;33m \u001b[0mhtml\u001b[0m \u001b[1;33m=\u001b[0m \u001b[1;34m\"\"\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 202\u001b[0m \u001b[0mgeneral_template\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mtemplate_env\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mget_template\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m\"general_info.html\"\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 203\u001b[0m \u001b[0mhtml\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mhtml\u001b[0m \u001b[1;33m+\u001b[0m \u001b[0mgeneral_template\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mrender\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mdata\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0moutput\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", "\u001b[1;32m~\\Anaconda3\\lib\\site-packages\\jinja2\\environment.py\u001b[0m in \u001b[0;36mget_template\u001b[1;34m(self, name, parent, globals)\u001b[0m\n\u001b[0;32m 828\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0mparent\u001b[0m \u001b[1;32mis\u001b[0m \u001b[1;32mnot\u001b[0m \u001b[1;32mNone\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 829\u001b[0m \u001b[0mname\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mjoin_path\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mname\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mparent\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 830\u001b[1;33m \u001b[1;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_load_template\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mname\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mmake_globals\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mglobals\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 831\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 832\u001b[0m \u001b[1;33m@\u001b[0m\u001b[0minternalcode\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", "\u001b[1;32m~\\Anaconda3\\lib\\site-packages\\jinja2\\environment.py\u001b[0m in \u001b[0;36m_load_template\u001b[1;34m(self, name, globals)\u001b[0m\n\u001b[0;32m 802\u001b[0m template.is_up_to_date):\n\u001b[0;32m 803\u001b[0m \u001b[1;32mreturn\u001b[0m \u001b[0mtemplate\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 804\u001b[1;33m \u001b[0mtemplate\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mloader\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mload\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mname\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mglobals\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 805\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mcache\u001b[0m \u001b[1;32mis\u001b[0m \u001b[1;32mnot\u001b[0m \u001b[1;32mNone\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 806\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mcache\u001b[0m\u001b[1;33m[\u001b[0m\u001b[0mcache_key\u001b[0m\u001b[1;33m]\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mtemplate\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", "\u001b[1;32m~\\Anaconda3\\lib\\site-packages\\jinja2\\loaders.py\u001b[0m in \u001b[0;36mload\u001b[1;34m(self, environment, name, globals)\u001b[0m\n\u001b[0;32m 111\u001b[0m \u001b[1;31m# first we try to get the source for this template together\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 112\u001b[0m \u001b[1;31m# with the filename and the uptodate function.\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 113\u001b[1;33m \u001b[0msource\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mfilename\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0muptodate\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mget_source\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0menvironment\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mname\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 114\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 115\u001b[0m \u001b[1;31m# try to load the code from the bytecode cache if there is a\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", "\u001b[1;32m~\\Anaconda3\\lib\\site-packages\\jinja2\\loaders.py\u001b[0m in \u001b[0;36mget_source\u001b[1;34m(self, environment, template)\u001b[0m\n\u001b[0;32m 185\u001b[0m \u001b[1;32mreturn\u001b[0m \u001b[1;32mFalse\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 186\u001b[0m \u001b[1;32mreturn\u001b[0m \u001b[0mcontents\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mfilename\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0muptodate\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 187\u001b[1;33m \u001b[1;32mraise\u001b[0m \u001b[0mTemplateNotFound\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mtemplate\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 188\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 189\u001b[0m \u001b[1;32mdef\u001b[0m \u001b[0mlist_templates\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", "\u001b[1;31mTemplateNotFound\u001b[0m: /out/general_info.html" ] } ], "source": [ "op.profiler.run(a, \"order_id\", infer=True, relative_error=1)" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [ { "ename": "NameError", "evalue": "name 'df' is not defined", "output_type": "error", "traceback": [ "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[1;31mNameError\u001b[0m Traceback (most recent call last)", "\u001b[1;32m\u001b[0m in \u001b[0;36m\u001b[1;34m\u001b[0m\n\u001b[1;32m----> 1\u001b[1;33m \u001b[0mdf\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mgroupBy\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m\"order_id\"\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mcount\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0msort\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m\"count\"\u001b[0m\u001b[1;33m,\u001b[0m\u001b[0mascending\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;32mFalse\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m", "\u001b[1;31mNameError\u001b[0m: name 'df' is not defined" ] } ], "source": [ "df.groupBy(\"order_id\").count().sort(\"count\",ascending=False)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernel_info": { "name": "python3" }, "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.1" }, "nteract": { "version": "0.11.6" } }, "nbformat": 4, "nbformat_minor": 2 }