{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "## The instruction bellow automagically reload Optimus if you made any modification on the library" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "# If you modify Optimus or any library this code is going to reload it\n", "%load_ext autoreload\n", "%autoreload " ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "# If you are in the example folder. This is the way to find optimus\n", "import sys" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "sys.path.append(\"..\")" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "# Create Optimus\n", "from optimus import Optimus" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "\n", " You are using PySparkling of version 2.4.10, but your PySpark is of\n", " version 2.3.1. Please make sure Spark and PySparkling versions are compatible. \n" ] } ], "source": [ "op = Optimus()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Go to optimus/create.py. In 'def data_frame()' write print(\"Hello World\"). Now run the cell below and you should see \"Hello World\" bellow the cell" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [], "source": [ "\n", "from pyspark.sql.types import StringType, IntegerType, ArrayType\n", "\n", "df = op.create.df(\n", " [\n", " (\"words\", \"str\", True),\n", " (\"num\", \"int\", True),\n", " (\"animals\", \"str\", True),\n", " (\"thing\", StringType(), True),\n", " (\"two strings\", StringType(), True),\n", " (\"filter\", StringType(), True),\n", " (\"num 2\", \"string\", True),\n", " (\"col_array\", ArrayType(StringType()), True),\n", " (\"col_int\", ArrayType(IntegerType()), True)\n", "\n", " ]\n", " ,\n", " [\n", " (\" I like fish \", 1, \"dog\", \"housé\", \"cat-car\", \"a\", \"1\", [\"baby\", \"sorry\"], [1, 2, 3]),\n", " (\" zombies\", 2, \"cat\", \"tv\", \"dog-tv\", \"b\", \"2\", [\"baby 1\", \"sorry 1\"], [3, 4]),\n", " (\"simpsons cat lady\", 2, \"frog\", \"table\", \"eagle-tv-plus\", \"1\", \"3\", [\"baby 2\", \"sorry 2\"], [5, 6, 7]),\n", " (None, 3, \"eagle\", \"glass\", \"lion-pc\", \"c\", \"4\", [\"baby 3\", \"sorry 3\"], [7, 8])\n", " ])\n", "\n", "# df.ext.display()" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n", "\n", "\n", "\n", "\n", "
Viewing 4 of 4 rows / 9 columns
\n", "
8 partition(s)
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
\n", "
words
\n", "
1 (string)
\n", "
\n", " \n", " nullable\n", " \n", "
\n", "
\n", "
num
\n", "
2 (int)
\n", "
\n", " \n", " nullable\n", " \n", "
\n", "
\n", "
animals
\n", "
3 (string)
\n", "
\n", " \n", " nullable\n", " \n", "
\n", "
\n", "
thing
\n", "
4 (string)
\n", "
\n", " \n", " nullable\n", " \n", "
\n", "
\n", "
two strings
\n", "
5 (string)
\n", "
\n", " \n", " nullable\n", " \n", "
\n", "
\n", "
filter
\n", "
6 (string)
\n", "
\n", " \n", " nullable\n", " \n", "
\n", "
\n", "
num 2
\n", "
7 (string)
\n", "
\n", " \n", " nullable\n", " \n", "
\n", "
\n", "
col_array
\n", "
8 (array<string>)
\n", "
\n", " \n", " nullable\n", " \n", "
\n", "
\n", "
col_int
\n", "
9 (array<int>)
\n", "
\n", " \n", " nullable\n", " \n", "
\n", "
\n", "
\n", " \n", " ⋅⋅I⋅like⋅⋅⋅⋅⋅fish⋅⋅\n", " \n", "
\n", "
\n", "
\n", " \n", " 1\n", " \n", "
\n", "
\n", "
\n", " \n", " dog\n", " \n", "
\n", "
\n", "
\n", " \n", " housé\n", " \n", "
\n", "
\n", "
\n", " \n", " cat-car\n", " \n", "
\n", "
\n", "
\n", " \n", " a\n", " \n", "
\n", "
\n", "
\n", " \n", " 1\n", " \n", "
\n", "
\n", "
\n", " \n", " ['baby',⋅'sorry']\n", " \n", "
\n", "
\n", "
\n", " \n", " [1,⋅2,⋅3]\n", " \n", "
\n", "
\n", "
\n", " \n", " ⋅⋅⋅⋅zombies\n", " \n", "
\n", "
\n", "
\n", " \n", " 2\n", " \n", "
\n", "
\n", "
\n", " \n", " cat\n", " \n", "
\n", "
\n", "
\n", " \n", " tv\n", " \n", "
\n", "
\n", "
\n", " \n", " dog-tv\n", " \n", "
\n", "
\n", "
\n", " \n", " b\n", " \n", "
\n", "
\n", "
\n", " \n", " 2\n", " \n", "
\n", "
\n", "
\n", " \n", " ['baby⋅1',⋅'sorry⋅1']\n", " \n", "
\n", "
\n", "
\n", " \n", " [3,⋅4]\n", " \n", "
\n", "
\n", "
\n", " \n", " simpsons⋅⋅⋅cat⋅lady\n", " \n", "
\n", "
\n", "
\n", " \n", " 2\n", " \n", "
\n", "
\n", "
\n", " \n", " frog\n", " \n", "
\n", "
\n", "
\n", " \n", " table\n", " \n", "
\n", "
\n", "
\n", " \n", " eagle-tv-plus\n", " \n", "
\n", "
\n", "
\n", " \n", " 1\n", " \n", "
\n", "
\n", "
\n", " \n", " 3\n", " \n", "
\n", "
\n", "
\n", " \n", " ['baby⋅2',⋅'sorry⋅2']\n", " \n", "
\n", "
\n", "
\n", " \n", " [5,⋅6,⋅7]\n", " \n", "
\n", "
\n", "
\n", " \n", " None\n", " \n", "
\n", "
\n", "
\n", " \n", " 3\n", " \n", "
\n", "
\n", "
\n", " \n", " eagle\n", " \n", "
\n", "
\n", "
\n", " \n", " glass\n", " \n", "
\n", "
\n", "
\n", " \n", " lion-pc\n", " \n", "
\n", "
\n", "
\n", " \n", " c\n", " \n", "
\n", "
\n", "
\n", " \n", " 4\n", " \n", "
\n", "
\n", "
\n", " \n", " ['baby⋅3',⋅'sorry⋅3']\n", " \n", "
\n", "
\n", "
\n", " \n", " [7,⋅8]\n", " \n", "
\n", "
\n", "\n", "\n", "
Viewing 4 of 4 rows / 9 columns
\n", "
8 partition(s)
\n" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "df.ext.display()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "jupytext": { "formats": "ipynb,py" }, "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.1" } }, "nbformat": 4, "nbformat_minor": 2 }