{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "## The instruction bellow automagically reload Optimus if you made any modification on the library" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "# If you modify Optimus or any library this code is going to reload it\n", "%load_ext autoreload\n", "%autoreload " ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "# If you are in the example folder. This is the way to find optimus\n", "import sys" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "sys.path.append(\"..\")" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "# Create Optimus\n", "from optimus import Optimus" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "\n", " You are using PySparkling of version 2.4.10, but your PySpark is of\n", " version 2.3.1. Please make sure Spark and PySparkling versions are compatible. \n" ] } ], "source": [ "op = Optimus()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Go to optimus/create.py. In 'def data_frame()' write print(\"Hello World\"). Now run the cell below and you should see \"Hello World\" bellow the cell" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [], "source": [ "\n", "from pyspark.sql.types import StringType, IntegerType, ArrayType\n", "\n", "df = op.create.df(\n", " [\n", " (\"words\", \"str\", True),\n", " (\"num\", \"int\", True),\n", " (\"animals\", \"str\", True),\n", " (\"thing\", StringType(), True),\n", " (\"two strings\", StringType(), True),\n", " (\"filter\", StringType(), True),\n", " (\"num 2\", \"string\", True),\n", " (\"col_array\", ArrayType(StringType()), True),\n", " (\"col_int\", ArrayType(IntegerType()), True)\n", "\n", " ]\n", " ,\n", " [\n", " (\" I like fish \", 1, \"dog\", \"housé\", \"cat-car\", \"a\", \"1\", [\"baby\", \"sorry\"], [1, 2, 3]),\n", " (\" zombies\", 2, \"cat\", \"tv\", \"dog-tv\", \"b\", \"2\", [\"baby 1\", \"sorry 1\"], [3, 4]),\n", " (\"simpsons cat lady\", 2, \"frog\", \"table\", \"eagle-tv-plus\", \"1\", \"3\", [\"baby 2\", \"sorry 2\"], [5, 6, 7]),\n", " (None, 3, \"eagle\", \"glass\", \"lion-pc\", \"c\", \"4\", [\"baby 3\", \"sorry 3\"], [7, 8])\n", " ])\n", "\n", "# df.ext.display()" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n", "\n", "\n", "\n", "\n", "
| \n",
" words \n",
" 1 (string) \n",
" \n",
" \n",
" nullable\n",
" \n",
" \n",
" | \n",
" \n",
" \n",
" num \n",
" 2 (int) \n",
" \n",
" \n",
" nullable\n",
" \n",
" \n",
" | \n",
" \n",
" \n",
" animals \n",
" 3 (string) \n",
" \n",
" \n",
" nullable\n",
" \n",
" \n",
" | \n",
" \n",
" \n",
" thing \n",
" 4 (string) \n",
" \n",
" \n",
" nullable\n",
" \n",
" \n",
" | \n",
" \n",
" \n",
" two strings \n",
" 5 (string) \n",
" \n",
" \n",
" nullable\n",
" \n",
" \n",
" | \n",
" \n",
" \n",
" filter \n",
" 6 (string) \n",
" \n",
" \n",
" nullable\n",
" \n",
" \n",
" | \n",
" \n",
" \n",
" num 2 \n",
" 7 (string) \n",
" \n",
" \n",
" nullable\n",
" \n",
" \n",
" | \n",
" \n",
" \n",
" col_array \n",
" 8 (array<string>) \n",
" \n",
" \n",
" nullable\n",
" \n",
" \n",
" | \n",
" \n",
" \n",
" col_int \n",
" 9 (array<int>) \n",
" \n",
" \n",
" nullable\n",
" \n",
" \n",
" | \n",
" \n",
"
|---|---|---|---|---|---|---|---|---|
| \n",
" \n",
" \n",
" ⋅⋅I⋅like⋅⋅⋅⋅⋅fish⋅⋅\n",
" \n",
" \n",
" | \n",
" \n",
" \n",
" \n",
" \n",
" 1\n",
" \n",
" \n",
" | \n",
" \n",
" \n",
" \n",
" \n",
" dog\n",
" \n",
" \n",
" | \n",
" \n",
" \n",
" \n",
" \n",
" housé\n",
" \n",
" \n",
" | \n",
" \n",
" \n",
" \n",
" \n",
" cat-car\n",
" \n",
" \n",
" | \n",
" \n",
" \n",
" \n",
" \n",
" a\n",
" \n",
" \n",
" | \n",
" \n",
" \n",
" \n",
" \n",
" 1\n",
" \n",
" \n",
" | \n",
" \n",
" \n",
" \n",
" \n",
" ['baby',⋅'sorry']\n",
" \n",
" \n",
" | \n",
" \n",
" \n",
" \n",
" \n",
" [1,⋅2,⋅3]\n",
" \n",
" \n",
" | \n",
" \n",
"
| \n",
" \n",
" \n",
" ⋅⋅⋅⋅zombies\n",
" \n",
" \n",
" | \n",
" \n",
" \n",
" \n",
" \n",
" 2\n",
" \n",
" \n",
" | \n",
" \n",
" \n",
" \n",
" \n",
" cat\n",
" \n",
" \n",
" | \n",
" \n",
" \n",
" \n",
" \n",
" tv\n",
" \n",
" \n",
" | \n",
" \n",
" \n",
" \n",
" \n",
" dog-tv\n",
" \n",
" \n",
" | \n",
" \n",
" \n",
" \n",
" \n",
" b\n",
" \n",
" \n",
" | \n",
" \n",
" \n",
" \n",
" \n",
" 2\n",
" \n",
" \n",
" | \n",
" \n",
" \n",
" \n",
" \n",
" ['baby⋅1',⋅'sorry⋅1']\n",
" \n",
" \n",
" | \n",
" \n",
" \n",
" \n",
" \n",
" [3,⋅4]\n",
" \n",
" \n",
" | \n",
" \n",
"
| \n",
" \n",
" \n",
" simpsons⋅⋅⋅cat⋅lady\n",
" \n",
" \n",
" | \n",
" \n",
" \n",
" \n",
" \n",
" 2\n",
" \n",
" \n",
" | \n",
" \n",
" \n",
" \n",
" \n",
" frog\n",
" \n",
" \n",
" | \n",
" \n",
" \n",
" \n",
" \n",
" table\n",
" \n",
" \n",
" | \n",
" \n",
" \n",
" \n",
" \n",
" eagle-tv-plus\n",
" \n",
" \n",
" | \n",
" \n",
" \n",
" \n",
" \n",
" 1\n",
" \n",
" \n",
" | \n",
" \n",
" \n",
" \n",
" \n",
" 3\n",
" \n",
" \n",
" | \n",
" \n",
" \n",
" \n",
" \n",
" ['baby⋅2',⋅'sorry⋅2']\n",
" \n",
" \n",
" | \n",
" \n",
" \n",
" \n",
" \n",
" [5,⋅6,⋅7]\n",
" \n",
" \n",
" | \n",
" \n",
"
| \n",
" \n",
" \n",
" None\n",
" \n",
" \n",
" | \n",
" \n",
" \n",
" \n",
" \n",
" 3\n",
" \n",
" \n",
" | \n",
" \n",
" \n",
" \n",
" \n",
" eagle\n",
" \n",
" \n",
" | \n",
" \n",
" \n",
" \n",
" \n",
" glass\n",
" \n",
" \n",
" | \n",
" \n",
" \n",
" \n",
" \n",
" lion-pc\n",
" \n",
" \n",
" | \n",
" \n",
" \n",
" \n",
" \n",
" c\n",
" \n",
" \n",
" | \n",
" \n",
" \n",
" \n",
" \n",
" 4\n",
" \n",
" \n",
" | \n",
" \n",
" \n",
" \n",
" \n",
" ['baby⋅3',⋅'sorry⋅3']\n",
" \n",
" \n",
" | \n",
" \n",
" \n",
" \n",
" \n",
" [7,⋅8]\n",
" \n",
" \n",
" | \n",
" \n",
"