{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "%load_ext autoreload\n", "%autoreload 2" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "import sys\n", "sys.path.append(\"..\")" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [], "source": [ "from optimus import Optimus\n", "op = Optimus()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Create a Spark Dataframe" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n", "\n", "\n", "\n", "\n", "
Viewing 9 of 9 rows / 1 columns
\n", "
1 partition(s)
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
\n", "
todo_id
\n", "
1 (string)
\n", "
\n", " \n", " nullable\n", " \n", "
\n", "
\n", " 1\n", "
\n", " 2\n", "
\n", " 3\n", "
\n", " 4\n", "
\n", " 5\n", "
\n", " 6\n", "
\n", " 7\n", "
\n", " 8\n", "
\n", " 9\n", "
\n", "\n", "
Viewing 9 of 9 rows / 1 columns
\n", "
1 partition(s)
\n" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "# make some test data\n", "columns = ['todo_id']\n", "vals = [1,2,3,4,5,6,7,8,9]\n", "\n", "# create DataFrame\n", "df = op.create.df(columns,vals).repartition(1).cache()\n", "df.table()" ] }, { "cell_type": "code", "execution_count": 39, "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "c8890ac5987a42d0916a029cf55abdaa", "version_major": 2, "version_minor": 0 }, "text/plain": [ "HBox(children=(IntProgress(value=0, description='Processing...', max=9), HTML(value='')))" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "\n" ] } ], "source": [ "import requests\n", "\n", "def func_request(params):\n", " # You can use here whatever header or auth info you need to send. \n", " # For more information see the requests library\n", " url= \"https://jsonplaceholder.typicode.com/todos/\" + str(params[\"todo_id\"])\n", "\n", " return requests.get(url)\n", "\n", "def func_response(response):\n", " # Here you can parse de response\n", " return response[\"title\"]\n", "\n", "e = op.enrich(host=\"localhost\", port=27017, db_name=\"jazz\")\n", "e.flush()\n", "df_result = e.run(df, func_request, func_response, calls= 60, period = 60, max_tries = 8)" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [ { "ename": "NameError", "evalue": "name 'df_result' is not defined", "output_type": "error", "traceback": [ "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[1;31mNameError\u001b[0m Traceback (most recent call last)", "\u001b[1;32m\u001b[0m in \u001b[0;36m\u001b[1;34m\u001b[0m\n\u001b[1;32m----> 1\u001b[1;33m \u001b[0mdf_result\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mtable\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m", "\u001b[1;31mNameError\u001b[0m: name 'df_result' is not defined" ] } ], "source": [ "df_result.table()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.1" } }, "nbformat": 4, "nbformat_minor": 2 }