{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "%load_ext autoreload\n", "%autoreload 2" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "import sys\n", "sys.path.append(\"..\")" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "from optimus import Optimus" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "..\\optimus\\engines\\base\\constants.py:25: DeprecationWarning: `np.str` is a deprecated alias for the builtin `str`. To silence this warning, use `str` by itself. Doing this will not modify any behavior and is safe. If you specifically wanted the numpy scalar type, use `np.str_` here.\n", "Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations\n", " DTYPES_DICT = {\"string\": np.str, \"uint8\": np.uint8, \"uint16\": np.uint16, \"uint32\": np.uint32,\n", "..\\optimus\\engines\\base\\constants.py:27: DeprecationWarning: `np.float` is a deprecated alias for the builtin `float`. To silence this warning, use `float` by itself. Doing this will not modify any behavior and is safe. If you specifically wanted the numpy scalar type, use `np.float64` here.\n", "Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations\n", " \"float\": np.float, \"float64\": np.float64, \"boolean\": np.bool, \"array\": np.array,\n", "..\\optimus\\engines\\base\\constants.py:27: DeprecationWarning: `np.bool` is a deprecated alias for the builtin `bool`. To silence this warning, use `bool` by itself. Doing this will not modify any behavior and is safe. If you specifically wanted the numpy scalar type, use `np.bool_` here.\n", "Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations\n", " \"float\": np.float, \"float64\": np.float64, \"boolean\": np.bool, \"array\": np.array,\n", "C:\\Users\\argenisleon\\Anaconda3\\lib\\site-packages\\statsmodels\\iolib\\foreign.py:651: DeprecationWarning: `np.long` is a deprecated alias for `np.compat.long`. To silence this warning, use `np.compat.long` by itself. In the likely event your code does not need to work on Python 2 you can use the builtin `int` for which `np.compat.long` is itself an alias. Doing this will not modify any behaviour and is safe. When replacing `np.long`, you may wish to use e.g. `np.int64` or `np.int32` to specify the precision. If you wish to review your current use, check the release note link for additional information.\n", "Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations\n", " _type_converters = {253 : np.long, 252 : int}\n", "C:\\Users\\argenisleon\\Anaconda3\\lib\\site-packages\\patsy\\constraint.py:13: DeprecationWarning: Using or importing the ABCs from 'collections' instead of from 'collections.abc' is deprecated since Python 3.3,and in 3.9 it will stop working\n", " from collections import Mapping\n", "C:\\Users\\argenisleon\\Anaconda3\\lib\\site-packages\\statsmodels\\stats\\_lilliefors.py:163: DeprecationWarning: `np.float` is a deprecated alias for the builtin `float`. To silence this warning, use `float` by itself. Doing this will not modify any behavior and is safe. If you specifically wanted the numpy scalar type, use `np.float64` here.\n", "Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations\n", " size = np.array(sorted(cv_data), dtype=np.float)\n", "WARNING:root:'PYARROW_IGNORE_TIMEZONE' environment variable was not set. It is required to set this environment variable to '1' in both driver and executor sides if you use pyarrow>=2.0.0. Koalas will set it for you but it does not work if there is a Spark context already launched.\n", "\n", " You are using PySparkling of version 2.4.10, but your PySpark is of\n", " version 3.1.1. Please make sure Spark and PySparkling versions are compatible. \n" ] } ], "source": [ "op = Optimus(\"spark\")" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "data/foo.csv\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Converting `np.character` to a dtype is deprecated. The current result is `np.dtype(np.str_)` which is not strictly correct. Note that `np.character` is generally deprecated and 'S1' should be used.\n", "`np.bool` is a deprecated alias for the builtin `bool`. To silence this warning, use `bool` by itself. Doing this will not modify any behavior and is safe. If you specifically wanted the numpy scalar type, use `np.bool_` here.\n", "Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations\n", "`np.float` is a deprecated alias for the builtin `float`. To silence this warning, use `float` by itself. Doing this will not modify any behavior and is safe. If you specifically wanted the numpy scalar type, use `np.float64` here.\n", "Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations\n", "`np.int` is a deprecated alias for the builtin `int`. To silence this warning, use `int` by itself. Doing this will not modify any behavior and is safe. When replacing `np.int`, you may wish to use e.g. `np.int64` or `np.int32` to specify the precision. If you wish to review your current use, check the release note link for additional information.\n", "Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations\n" ] } ], "source": [ "df = op.load.csv(\"data/foo.csv\")" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [], "source": [ "import os" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "DataFrame.toPandas is deprecated as of DataFrame.to_pandas. Please use the API instead.\n" ] }, { "data": { "text/html": [ "\n", "\n", "\n", "\n", "\n", "
Viewing 10 of 19 rows / 8 columns
\n", "
1 partition(s)
\n", "\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
\n", "
id
\n", "
1 (int32)
\n", " \n", "
\n", "
firstName
\n", "
2 (object)
\n", " \n", "
\n", "
lastName
\n", "
3 (object)
\n", " \n", "
\n", "
billingId
\n", "
4 (int32)
\n", " \n", "
\n", "
product
\n", "
5 (object)
\n", " \n", "
\n", "
price
\n", "
6 (int32)
\n", " \n", "
\n", "
birth
\n", "
7 (object)
\n", " \n", "
\n", "
dummyCol
\n", "
8 (object)
\n", " \n", "
\n", "
\n", " \n", " 1\n", " \n", "
\n", "
\n", "
\n", " \n", " Luis\n", " \n", "
\n", "
\n", "
\n", " \n", " Alvarez$$%!\n", " \n", "
\n", "
\n", "
\n", " \n", " 123\n", " \n", "
\n", "
\n", "
\n", " \n", " Cake\n", " \n", "
\n", "
\n", "
\n", " \n", " 10\n", " \n", "
\n", "
\n", "
\n", " \n", " 1980/07/07\n", " \n", "
\n", "
\n", "
\n", " \n", " never\n", " \n", "
\n", "
\n", "
\n", " \n", " 2\n", " \n", "
\n", "
\n", "
\n", " \n", " André\n", " \n", "
\n", "
\n", "
\n", " \n", " Ampère\n", " \n", "
\n", "
\n", "
\n", " \n", " 423\n", " \n", "
\n", "
\n", "
\n", " \n", " piza\n", " \n", "
\n", "
\n", "
\n", " \n", " 8\n", " \n", "
\n", "
\n", "
\n", " \n", " 1950/07/08\n", " \n", "
\n", "
\n", "
\n", " \n", " gonna\n", " \n", "
\n", "
\n", "
\n", " \n", " 3\n", " \n", "
\n", "
\n", "
\n", " \n", " NiELS\n", " \n", "
\n", "
\n", "
\n", " \n", " Böhr//((%%\n", " \n", "
\n", "
\n", "
\n", " \n", " 551\n", " \n", "
\n", "
\n", "
\n", " \n", " pizza\n", " \n", "
\n", "
\n", "
\n", " \n", " 8\n", " \n", "
\n", "
\n", "
\n", " \n", " 1990/07/09\n", " \n", "
\n", "
\n", "
\n", " \n", " give\n", " \n", "
\n", "
\n", "
\n", " \n", " 4\n", " \n", "
\n", "
\n", "
\n", " \n", " PAUL\n", " \n", "
\n", "
\n", "
\n", " \n", " dirac$\n", " \n", "
\n", "
\n", "
\n", " \n", " 521\n", " \n", "
\n", "
\n", "
\n", " \n", " pizza\n", " \n", "
\n", "
\n", "
\n", " \n", " 8\n", " \n", "
\n", "
\n", "
\n", " \n", " 1954/07/10\n", " \n", "
\n", "
\n", "
\n", " \n", " you\n", " \n", "
\n", "
\n", "
\n", " \n", " 5\n", " \n", "
\n", "
\n", "
\n", " \n", " Albert\n", " \n", "
\n", "
\n", "
\n", " \n", " Einstein\n", " \n", "
\n", "
\n", "
\n", " \n", " 634\n", " \n", "
\n", "
\n", "
\n", " \n", " pizza\n", " \n", "
\n", "
\n", "
\n", " \n", " 8\n", " \n", "
\n", "
\n", "
\n", " \n", " 1990/07/11\n", " \n", "
\n", "
\n", "
\n", " \n", " up\n", " \n", "
\n", "
\n", "
\n", " \n", " 6\n", " \n", "
\n", "
\n", "
\n", " \n", " Galileo\n", " \n", "
\n", "
\n", "
\n", " \n", " ⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅GALiLEI\n", " \n", "
\n", "
\n", "
\n", " \n", " 672\n", " \n", "
\n", "
\n", "
\n", " \n", " arepa\n", " \n", "
\n", "
\n", "
\n", " \n", " 5\n", " \n", "
\n", "
\n", "
\n", " \n", " 1930/08/12\n", " \n", "
\n", "
\n", "
\n", " \n", " never\n", " \n", "
\n", "
\n", "
\n", " \n", " 7\n", " \n", "
\n", "
\n", "
\n", " \n", " CaRL\n", " \n", "
\n", "
\n", "
\n", " \n", " Ga%%%uss\n", " \n", "
\n", "
\n", "
\n", " \n", " 323\n", " \n", "
\n", "
\n", "
\n", " \n", " taco\n", " \n", "
\n", "
\n", "
\n", " \n", " 3\n", " \n", "
\n", "
\n", "
\n", " \n", " 1970/07/13\n", " \n", "
\n", "
\n", "
\n", " \n", " gonna\n", " \n", "
\n", "
\n", "
\n", " \n", " 8\n", " \n", "
\n", "
\n", "
\n", " \n", " David\n", " \n", "
\n", "
\n", "
\n", " \n", " H$$$ilbert\n", " \n", "
\n", "
\n", "
\n", " \n", " 624\n", " \n", "
\n", "
\n", "
\n", " \n", " taaaccoo\n", " \n", "
\n", "
\n", "
\n", " \n", " 3\n", " \n", "
\n", "
\n", "
\n", " \n", " 1950/07/14\n", " \n", "
\n", "
\n", "
\n", " \n", " let\n", " \n", "
\n", "
\n", "
\n", " \n", " 9\n", " \n", "
\n", "
\n", "
\n", " \n", " Johannes\n", " \n", "
\n", "
\n", "
\n", " \n", " KEPLER\n", " \n", "
\n", "
\n", "
\n", " \n", " 735\n", " \n", "
\n", "
\n", "
\n", " \n", " taco\n", " \n", "
\n", "
\n", "
\n", " \n", " 3\n", " \n", "
\n", "
\n", "
\n", " \n", " 1920/04/22\n", " \n", "
\n", "
\n", "
\n", " \n", " you\n", " \n", "
\n", "
\n", "
\n", " \n", " 10\n", " \n", "
\n", "
\n", "
\n", " \n", " JaMES\n", " \n", "
\n", "
\n", "
\n", " \n", " M$$ax%%well\n", " \n", "
\n", "
\n", "
\n", " \n", " 875\n", " \n", "
\n", "
\n", "
\n", " \n", " taco\n", " \n", "
\n", "
\n", "
\n", " \n", " 3\n", " \n", "
\n", "
\n", "
\n", " \n", " 1923/03/12\n", " \n", "
\n", "
\n", "
\n", " \n", " down\n", " \n", "
\n", "
\n", "
\n", " \n", " 11\n", " \n", "
\n", "
\n", "
\n", " \n", " Isaac\n", " \n", "
\n", "
\n", "
\n", " \n", " Newton\n", " \n", "
\n", "
\n", "
\n", " \n", " 992\n", " \n", "
\n", "
\n", "
\n", " \n", " pasta\n", " \n", "
\n", "
\n", "
\n", " \n", " 9\n", " \n", "
\n", "
\n", "
\n", " \n", " 1999/02/15\n", " \n", "
\n", "
\n", "
\n", " \n", " never⋅\n", " \n", "
\n", "
\n", "
\n", "
\n", "\n", "
Viewing 10 of 19 rows / 8 columns
\n", "
1 partition(s) <class 'optimus.engines.spark.dataframe.SparkDataFrame'>
\n", "\n" ], "text/plain": [ " id firstName lastName billingId product price birth dummyCol\n", " (int32) (object) (object) (int32) (object) (int32) (object) (object)\n", "--------- ----------- ----------- ----------- ---------- --------- ---------- ----------\n", " 1 Luis Alvarez$$%! 123 Cake 10 1980/07/07 never\n", " 2 André Ampère 423 piza 8 1950/07/08 gonna\n", " 3 NiELS Böhr//((%% 551 pizza 8 1990/07/09 give\n", " 4 PAUL dirac$ 521 pizza 8 1954/07/10 you\n", " 5 Albert Einstein 634 pizza 8 1990/07/11 up\n", " 6 Galileo GALiLEI 672 arepa 5 1930/08/12 never\n", " 7 CaRL Ga%%%uss 323 taco 3 1970/07/13 gonna\n", " 8 David H$$$ilbert 624 taaaccoo 3 1950/07/14 let\n", " 9 Johannes KEPLER 735 taco 3 1920/04/22 you\n", " 10 JaMES M$$ax%%well 875 taco 3 1923/03/12 down\n", " 11 Isaac Newton 992 pasta 9 1999/02/15 never" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n", "\n", "\n", "\n", "\n", "
Viewing 10 of 19 rows / 8 columns
\n", "
1 partition(s)
\n", "\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
\n", "
id
\n", "
1 (object)
\n", " \n", "
\n", "
firstName
\n", "
2 (object)
\n", " \n", "
\n", "
lastName
\n", "
3 (object)
\n", " \n", "
\n", "
billingId
\n", "
4 (object)
\n", " \n", "
\n", "
product
\n", "
5 (object)
\n", " \n", "
\n", "
price
\n", "
6 (object)
\n", " \n", "
\n", "
birth
\n", "
7 (object)
\n", " \n", "
\n", "
dummyCol
\n", "
8 (object)
\n", " \n", "
\n", "
\n", " \n", " 1\n", " \n", "
\n", "
\n", "
\n", " \n", " luis\n", " \n", "
\n", "
\n", "
\n", " \n", " alvarez$$%!\n", " \n", "
\n", "
\n", "
\n", " \n", " 123\n", " \n", "
\n", "
\n", "
\n", " \n", " cake\n", " \n", "
\n", "
\n", "
\n", " \n", " 10\n", " \n", "
\n", "
\n", "
\n", " \n", " 1980/07/07\n", " \n", "
\n", "
\n", "
\n", " \n", " never\n", " \n", "
\n", "
\n", "
\n", " \n", " 2\n", " \n", "
\n", "
\n", "
\n", " \n", " andré\n", " \n", "
\n", "
\n", "
\n", " \n", " ampère\n", " \n", "
\n", "
\n", "
\n", " \n", " 423\n", " \n", "
\n", "
\n", "
\n", " \n", " piza\n", " \n", "
\n", "
\n", "
\n", " \n", " 8\n", " \n", "
\n", "
\n", "
\n", " \n", " 1950/07/08\n", " \n", "
\n", "
\n", "
\n", " \n", " gonna\n", " \n", "
\n", "
\n", "
\n", " \n", " 3\n", " \n", "
\n", "
\n", "
\n", " \n", " niels\n", " \n", "
\n", "
\n", "
\n", " \n", " böhr//((%%\n", " \n", "
\n", "
\n", "
\n", " \n", " 551\n", " \n", "
\n", "
\n", "
\n", " \n", " pizza\n", " \n", "
\n", "
\n", "
\n", " \n", " 8\n", " \n", "
\n", "
\n", "
\n", " \n", " 1990/07/09\n", " \n", "
\n", "
\n", "
\n", " \n", " give\n", " \n", "
\n", "
\n", "
\n", " \n", " 4\n", " \n", "
\n", "
\n", "
\n", " \n", " paul\n", " \n", "
\n", "
\n", "
\n", " \n", " dirac$\n", " \n", "
\n", "
\n", "
\n", " \n", " 521\n", " \n", "
\n", "
\n", "
\n", " \n", " pizza\n", " \n", "
\n", "
\n", "
\n", " \n", " 8\n", " \n", "
\n", "
\n", "
\n", " \n", " 1954/07/10\n", " \n", "
\n", "
\n", "
\n", " \n", " you\n", " \n", "
\n", "
\n", "
\n", " \n", " 5\n", " \n", "
\n", "
\n", "
\n", " \n", " albert\n", " \n", "
\n", "
\n", "
\n", " \n", " einstein\n", " \n", "
\n", "
\n", "
\n", " \n", " 634\n", " \n", "
\n", "
\n", "
\n", " \n", " pizza\n", " \n", "
\n", "
\n", "
\n", " \n", " 8\n", " \n", "
\n", "
\n", "
\n", " \n", " 1990/07/11\n", " \n", "
\n", "
\n", "
\n", " \n", " up\n", " \n", "
\n", "
\n", "
\n", " \n", " 6\n", " \n", "
\n", "
\n", "
\n", " \n", " galileo\n", " \n", "
\n", "
\n", "
\n", " \n", " ⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅galilei\n", " \n", "
\n", "
\n", "
\n", " \n", " 672\n", " \n", "
\n", "
\n", "
\n", " \n", " arepa\n", " \n", "
\n", "
\n", "
\n", " \n", " 5\n", " \n", "
\n", "
\n", "
\n", " \n", " 1930/08/12\n", " \n", "
\n", "
\n", "
\n", " \n", " never\n", " \n", "
\n", "
\n", "
\n", " \n", " 7\n", " \n", "
\n", "
\n", "
\n", " \n", " carl\n", " \n", "
\n", "
\n", "
\n", " \n", " ga%%%uss\n", " \n", "
\n", "
\n", "
\n", " \n", " 323\n", " \n", "
\n", "
\n", "
\n", " \n", " taco\n", " \n", "
\n", "
\n", "
\n", " \n", " 3\n", " \n", "
\n", "
\n", "
\n", " \n", " 1970/07/13\n", " \n", "
\n", "
\n", "
\n", " \n", " gonna\n", " \n", "
\n", "
\n", "
\n", " \n", " 8\n", " \n", "
\n", "
\n", "
\n", " \n", " david\n", " \n", "
\n", "
\n", "
\n", " \n", " h$$$ilbert\n", " \n", "
\n", "
\n", "
\n", " \n", " 624\n", " \n", "
\n", "
\n", "
\n", " \n", " taaaccoo\n", " \n", "
\n", "
\n", "
\n", " \n", " 3\n", " \n", "
\n", "
\n", "
\n", " \n", " 1950/07/14\n", " \n", "
\n", "
\n", "
\n", " \n", " let\n", " \n", "
\n", "
\n", "
\n", " \n", " 9\n", " \n", "
\n", "
\n", "
\n", " \n", " johannes\n", " \n", "
\n", "
\n", "
\n", " \n", " kepler\n", " \n", "
\n", "
\n", "
\n", " \n", " 735\n", " \n", "
\n", "
\n", "
\n", " \n", " taco\n", " \n", "
\n", "
\n", "
\n", " \n", " 3\n", " \n", "
\n", "
\n", "
\n", " \n", " 1920/04/22\n", " \n", "
\n", "
\n", "
\n", " \n", " you\n", " \n", "
\n", "
\n", "
\n", " \n", " 10\n", " \n", "
\n", "
\n", "
\n", " \n", " james\n", " \n", "
\n", "
\n", "
\n", " \n", " m$$ax%%well\n", " \n", "
\n", "
\n", "
\n", " \n", " 875\n", " \n", "
\n", "
\n", "
\n", " \n", " taco\n", " \n", "
\n", "
\n", "
\n", " \n", " 3\n", " \n", "
\n", "
\n", "
\n", " \n", " 1923/03/12\n", " \n", "
\n", "
\n", "
\n", " \n", " down\n", " \n", "
\n", "
\n", "
\n", " \n", " 11\n", " \n", "
\n", "
\n", "
\n", " \n", " isaac\n", " \n", "
\n", "
\n", "
\n", " \n", " newton\n", " \n", "
\n", "
\n", "
\n", " \n", " 992\n", " \n", "
\n", "
\n", "
\n", " \n", " pasta\n", " \n", "
\n", "
\n", "
\n", " \n", " 9\n", " \n", "
\n", "
\n", "
\n", " \n", " 1999/02/15\n", " \n", "
\n", "
\n", "
\n", " \n", " never⋅\n", " \n", "
\n", "
\n", "
\n", "
\n", "\n", "
Viewing 10 of 19 rows / 8 columns
\n", "
1 partition(s) <class 'optimus.engines.spark.dataframe.SparkDataFrame'>
\n", "\n" ], "text/plain": [ " id firstName lastName billingId product price birth dummyCol\n", " (object) (object) (object) (object) (object) (object) (object) (object)\n", "---------- ----------- ----------- ----------- ---------- ---------- ---------- ----------\n", " 1 luis alvarez$$%! 123 cake 10 1980/07/07 never\n", " 2 andré ampère 423 piza 8 1950/07/08 gonna\n", " 3 niels böhr//((%% 551 pizza 8 1990/07/09 give\n", " 4 paul dirac$ 521 pizza 8 1954/07/10 you\n", " 5 albert einstein 634 pizza 8 1990/07/11 up\n", " 6 galileo galilei 672 arepa 5 1930/08/12 never\n", " 7 carl ga%%%uss 323 taco 3 1970/07/13 gonna\n", " 8 david h$$$ilbert 624 taaaccoo 3 1950/07/14 let\n", " 9 johannes kepler 735 taco 3 1920/04/22 you\n", " 10 james m$$ax%%well 875 taco 3 1923/03/12 down\n", " 11 isaac newton 992 pasta 9 1999/02/15 never" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.cols.lower()" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n", "\n", "\n", "\n", "\n", "
Viewing 10 of 19 rows / 8 columns
\n", "
1 partition(s)
\n", "\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
\n", "
id
\n", "
1 (object)
\n", " \n", "
\n", "
firstName
\n", "
2 (object)
\n", " \n", "
\n", "
lastName
\n", "
3 (object)
\n", " \n", "
\n", "
billingId
\n", "
4 (object)
\n", " \n", "
\n", "
product
\n", "
5 (object)
\n", " \n", "
\n", "
price
\n", "
6 (object)
\n", " \n", "
\n", "
birth
\n", "
7 (object)
\n", " \n", "
\n", "
dummyCol
\n", "
8 (object)
\n", " \n", "
\n", "
\n", " \n", " 1\n", " \n", "
\n", "
\n", "
\n", " \n", " LUIS\n", " \n", "
\n", "
\n", "
\n", " \n", " ALVAREZ$$%!\n", " \n", "
\n", "
\n", "
\n", " \n", " 123\n", " \n", "
\n", "
\n", "
\n", " \n", " CAKE\n", " \n", "
\n", "
\n", "
\n", " \n", " 10\n", " \n", "
\n", "
\n", "
\n", " \n", " 1980/07/07\n", " \n", "
\n", "
\n", "
\n", " \n", " NEVER\n", " \n", "
\n", "
\n", "
\n", " \n", " 2\n", " \n", "
\n", "
\n", "
\n", " \n", " ANDRÉ\n", " \n", "
\n", "
\n", "
\n", " \n", " AMPÈRE\n", " \n", "
\n", "
\n", "
\n", " \n", " 423\n", " \n", "
\n", "
\n", "
\n", " \n", " PIZA\n", " \n", "
\n", "
\n", "
\n", " \n", " 8\n", " \n", "
\n", "
\n", "
\n", " \n", " 1950/07/08\n", " \n", "
\n", "
\n", "
\n", " \n", " GONNA\n", " \n", "
\n", "
\n", "
\n", " \n", " 3\n", " \n", "
\n", "
\n", "
\n", " \n", " NIELS\n", " \n", "
\n", "
\n", "
\n", " \n", " BÖHR//((%%\n", " \n", "
\n", "
\n", "
\n", " \n", " 551\n", " \n", "
\n", "
\n", "
\n", " \n", " PIZZA\n", " \n", "
\n", "
\n", "
\n", " \n", " 8\n", " \n", "
\n", "
\n", "
\n", " \n", " 1990/07/09\n", " \n", "
\n", "
\n", "
\n", " \n", " GIVE\n", " \n", "
\n", "
\n", "
\n", " \n", " 4\n", " \n", "
\n", "
\n", "
\n", " \n", " PAUL\n", " \n", "
\n", "
\n", "
\n", " \n", " DIRAC$\n", " \n", "
\n", "
\n", "
\n", " \n", " 521\n", " \n", "
\n", "
\n", "
\n", " \n", " PIZZA\n", " \n", "
\n", "
\n", "
\n", " \n", " 8\n", " \n", "
\n", "
\n", "
\n", " \n", " 1954/07/10\n", " \n", "
\n", "
\n", "
\n", " \n", " YOU\n", " \n", "
\n", "
\n", "
\n", " \n", " 5\n", " \n", "
\n", "
\n", "
\n", " \n", " ALBERT\n", " \n", "
\n", "
\n", "
\n", " \n", " EINSTEIN\n", " \n", "
\n", "
\n", "
\n", " \n", " 634\n", " \n", "
\n", "
\n", "
\n", " \n", " PIZZA\n", " \n", "
\n", "
\n", "
\n", " \n", " 8\n", " \n", "
\n", "
\n", "
\n", " \n", " 1990/07/11\n", " \n", "
\n", "
\n", "
\n", " \n", " UP\n", " \n", "
\n", "
\n", "
\n", " \n", " 6\n", " \n", "
\n", "
\n", "
\n", " \n", " GALILEO\n", " \n", "
\n", "
\n", "
\n", " \n", " ⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅GALILEI\n", " \n", "
\n", "
\n", "
\n", " \n", " 672\n", " \n", "
\n", "
\n", "
\n", " \n", " AREPA\n", " \n", "
\n", "
\n", "
\n", " \n", " 5\n", " \n", "
\n", "
\n", "
\n", " \n", " 1930/08/12\n", " \n", "
\n", "
\n", "
\n", " \n", " NEVER\n", " \n", "
\n", "
\n", "
\n", " \n", " 7\n", " \n", "
\n", "
\n", "
\n", " \n", " CARL\n", " \n", "
\n", "
\n", "
\n", " \n", " GA%%%USS\n", " \n", "
\n", "
\n", "
\n", " \n", " 323\n", " \n", "
\n", "
\n", "
\n", " \n", " TACO\n", " \n", "
\n", "
\n", "
\n", " \n", " 3\n", " \n", "
\n", "
\n", "
\n", " \n", " 1970/07/13\n", " \n", "
\n", "
\n", "
\n", " \n", " GONNA\n", " \n", "
\n", "
\n", "
\n", " \n", " 8\n", " \n", "
\n", "
\n", "
\n", " \n", " DAVID\n", " \n", "
\n", "
\n", "
\n", " \n", " H$$$ILBERT\n", " \n", "
\n", "
\n", "
\n", " \n", " 624\n", " \n", "
\n", "
\n", "
\n", " \n", " TAAACCOO\n", " \n", "
\n", "
\n", "
\n", " \n", " 3\n", " \n", "
\n", "
\n", "
\n", " \n", " 1950/07/14\n", " \n", "
\n", "
\n", "
\n", " \n", " LET\n", " \n", "
\n", "
\n", "
\n", " \n", " 9\n", " \n", "
\n", "
\n", "
\n", " \n", " JOHANNES\n", " \n", "
\n", "
\n", "
\n", " \n", " KEPLER\n", " \n", "
\n", "
\n", "
\n", " \n", " 735\n", " \n", "
\n", "
\n", "
\n", " \n", " TACO\n", " \n", "
\n", "
\n", "
\n", " \n", " 3\n", " \n", "
\n", "
\n", "
\n", " \n", " 1920/04/22\n", " \n", "
\n", "
\n", "
\n", " \n", " YOU\n", " \n", "
\n", "
\n", "
\n", " \n", " 10\n", " \n", "
\n", "
\n", "
\n", " \n", " JAMES\n", " \n", "
\n", "
\n", "
\n", " \n", " M$$AX%%WELL\n", " \n", "
\n", "
\n", "
\n", " \n", " 875\n", " \n", "
\n", "
\n", "
\n", " \n", " TACO\n", " \n", "
\n", "
\n", "
\n", " \n", " 3\n", " \n", "
\n", "
\n", "
\n", " \n", " 1923/03/12\n", " \n", "
\n", "
\n", "
\n", " \n", " DOWN\n", " \n", "
\n", "
\n", "
\n", " \n", " 11\n", " \n", "
\n", "
\n", "
\n", " \n", " ISAAC\n", " \n", "
\n", "
\n", "
\n", " \n", " NEWTON\n", " \n", "
\n", "
\n", "
\n", " \n", " 992\n", " \n", "
\n", "
\n", "
\n", " \n", " PASTA\n", " \n", "
\n", "
\n", "
\n", " \n", " 9\n", " \n", "
\n", "
\n", "
\n", " \n", " 1999/02/15\n", " \n", "
\n", "
\n", "
\n", " \n", " NEVER⋅\n", " \n", "
\n", "
\n", "
\n", "
\n", "\n", "
Viewing 10 of 19 rows / 8 columns
\n", "
1 partition(s) <class 'optimus.engines.spark.dataframe.SparkDataFrame'>
\n", "\n" ], "text/plain": [ " id firstName lastName billingId product price birth dummyCol\n", " (object) (object) (object) (object) (object) (object) (object) (object)\n", "---------- ----------- ----------- ----------- ---------- ---------- ---------- ----------\n", " 1 LUIS ALVAREZ$$%! 123 CAKE 10 1980/07/07 NEVER\n", " 2 ANDRÉ AMPÈRE 423 PIZA 8 1950/07/08 GONNA\n", " 3 NIELS BÖHR//((%% 551 PIZZA 8 1990/07/09 GIVE\n", " 4 PAUL DIRAC$ 521 PIZZA 8 1954/07/10 YOU\n", " 5 ALBERT EINSTEIN 634 PIZZA 8 1990/07/11 UP\n", " 6 GALILEO GALILEI 672 AREPA 5 1930/08/12 NEVER\n", " 7 CARL GA%%%USS 323 TACO 3 1970/07/13 GONNA\n", " 8 DAVID H$$$ILBERT 624 TAAACCOO 3 1950/07/14 LET\n", " 9 JOHANNES KEPLER 735 TACO 3 1920/04/22 YOU\n", " 10 JAMES M$$AX%%WELL 875 TACO 3 1923/03/12 DOWN\n", " 11 ISAAC NEWTON 992 PASTA 9 1999/02/15 NEVER" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.cols.upper()" ] }, { "cell_type": "code", "execution_count": 20, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "10.0" ] }, "execution_count": 20, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.cols.std(\"id\")\n", "df.cols.mean(\"id\")\n", "df.cols.kurtosis(\"id\")\n", "df.cols.median(\"id\")" ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [ { "ename": "NameError", "evalue": "name 'self' is not defined", "output_type": "error", "traceback": [ "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[1;31mNameError\u001b[0m Traceback (most recent call last)", "\u001b[1;32m\u001b[0m in \u001b[0;36m\u001b[1;34m\u001b[0m\n\u001b[0;32m 1\u001b[0m \u001b[0mdf\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mcols\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mabs\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m\"id\"\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m----> 2\u001b[1;33m \u001b[0mdf\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mcols\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mreverse\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m\"id\"\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m", "\u001b[1;32m~\\Documents\\Optimus\\optimus\\engines\\spark\\columns.py\u001b[0m in \u001b[0;36mreverse\u001b[1;34m(columns)\u001b[0m\n\u001b[0;32m 641\u001b[0m \"\"\"\n\u001b[0;32m 642\u001b[0m \u001b[1;31m# TODO: make this in one pass.\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 643\u001b[1;33m \u001b[0mdf\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mroot\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 644\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 645\u001b[0m columns = parse_columns(\n", "\u001b[1;31mNameError\u001b[0m: name 'self' is not defined" ] } ], "source": [ "df.cols.abs(\"id\")\n", "df.cols.reverse(\"id\")" ] }, { "cell_type": "code", "execution_count": 19, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0.0" ] }, "execution_count": 19, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.cols.var([\"id\"])\n", "df.cols.skew([\"id\"])" ] }, { "cell_type": "code", "execution_count": 22, "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n", "\n", "\n", "\n", "\n", "
Viewing 10 of 19 rows / 7 columns
\n", "
1 partition(s)
\n", "\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
\n", "
firstName
\n", "
1 (object)
\n", " \n", "
\n", "
lastName
\n", "
2 (object)
\n", " \n", "
\n", "
billingId
\n", "
3 (int32)
\n", " \n", "
\n", "
product
\n", "
4 (object)
\n", " \n", "
\n", "
price
\n", "
5 (int32)
\n", " \n", "
\n", "
birth
\n", "
6 (object)
\n", " \n", "
\n", "
dummyCol
\n", "
7 (object)
\n", " \n", "
\n", "
\n", " \n", " Luis\n", " \n", "
\n", "
\n", "
\n", " \n", " Alvarez$$%!\n", " \n", "
\n", "
\n", "
\n", " \n", " 123\n", " \n", "
\n", "
\n", "
\n", " \n", " Cake\n", " \n", "
\n", "
\n", "
\n", " \n", " 10\n", " \n", "
\n", "
\n", "
\n", " \n", " 1980/07/07\n", " \n", "
\n", "
\n", "
\n", " \n", " never\n", " \n", "
\n", "
\n", "
\n", " \n", " André\n", " \n", "
\n", "
\n", "
\n", " \n", " Ampère\n", " \n", "
\n", "
\n", "
\n", " \n", " 423\n", " \n", "
\n", "
\n", "
\n", " \n", " piza\n", " \n", "
\n", "
\n", "
\n", " \n", " 8\n", " \n", "
\n", "
\n", "
\n", " \n", " 1950/07/08\n", " \n", "
\n", "
\n", "
\n", " \n", " gonna\n", " \n", "
\n", "
\n", "
\n", " \n", " NiELS\n", " \n", "
\n", "
\n", "
\n", " \n", " Böhr//((%%\n", " \n", "
\n", "
\n", "
\n", " \n", " 551\n", " \n", "
\n", "
\n", "
\n", " \n", " pizza\n", " \n", "
\n", "
\n", "
\n", " \n", " 8\n", " \n", "
\n", "
\n", "
\n", " \n", " 1990/07/09\n", " \n", "
\n", "
\n", "
\n", " \n", " give\n", " \n", "
\n", "
\n", "
\n", " \n", " PAUL\n", " \n", "
\n", "
\n", "
\n", " \n", " dirac$\n", " \n", "
\n", "
\n", "
\n", " \n", " 521\n", " \n", "
\n", "
\n", "
\n", " \n", " pizza\n", " \n", "
\n", "
\n", "
\n", " \n", " 8\n", " \n", "
\n", "
\n", "
\n", " \n", " 1954/07/10\n", " \n", "
\n", "
\n", "
\n", " \n", " you\n", " \n", "
\n", "
\n", "
\n", " \n", " Albert\n", " \n", "
\n", "
\n", "
\n", " \n", " Einstein\n", " \n", "
\n", "
\n", "
\n", " \n", " 634\n", " \n", "
\n", "
\n", "
\n", " \n", " pizza\n", " \n", "
\n", "
\n", "
\n", " \n", " 8\n", " \n", "
\n", "
\n", "
\n", " \n", " 1990/07/11\n", " \n", "
\n", "
\n", "
\n", " \n", " up\n", " \n", "
\n", "
\n", "
\n", " \n", " Galileo\n", " \n", "
\n", "
\n", "
\n", " \n", " ⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅GALiLEI\n", " \n", "
\n", "
\n", "
\n", " \n", " 672\n", " \n", "
\n", "
\n", "
\n", " \n", " arepa\n", " \n", "
\n", "
\n", "
\n", " \n", " 5\n", " \n", "
\n", "
\n", "
\n", " \n", " 1930/08/12\n", " \n", "
\n", "
\n", "
\n", " \n", " never\n", " \n", "
\n", "
\n", "
\n", " \n", " CaRL\n", " \n", "
\n", "
\n", "
\n", " \n", " Ga%%%uss\n", " \n", "
\n", "
\n", "
\n", " \n", " 323\n", " \n", "
\n", "
\n", "
\n", " \n", " taco\n", " \n", "
\n", "
\n", "
\n", " \n", " 3\n", " \n", "
\n", "
\n", "
\n", " \n", " 1970/07/13\n", " \n", "
\n", "
\n", "
\n", " \n", " gonna\n", " \n", "
\n", "
\n", "
\n", " \n", " David\n", " \n", "
\n", "
\n", "
\n", " \n", " H$$$ilbert\n", " \n", "
\n", "
\n", "
\n", " \n", " 624\n", " \n", "
\n", "
\n", "
\n", " \n", " taaaccoo\n", " \n", "
\n", "
\n", "
\n", " \n", " 3\n", " \n", "
\n", "
\n", "
\n", " \n", " 1950/07/14\n", " \n", "
\n", "
\n", "
\n", " \n", " let\n", " \n", "
\n", "
\n", "
\n", " \n", " Johannes\n", " \n", "
\n", "
\n", "
\n", " \n", " KEPLER\n", " \n", "
\n", "
\n", "
\n", " \n", " 735\n", " \n", "
\n", "
\n", "
\n", " \n", " taco\n", " \n", "
\n", "
\n", "
\n", " \n", " 3\n", " \n", "
\n", "
\n", "
\n", " \n", " 1920/04/22\n", " \n", "
\n", "
\n", "
\n", " \n", " you\n", " \n", "
\n", "
\n", "
\n", " \n", " JaMES\n", " \n", "
\n", "
\n", "
\n", " \n", " M$$ax%%well\n", " \n", "
\n", "
\n", "
\n", " \n", " 875\n", " \n", "
\n", "
\n", "
\n", " \n", " taco\n", " \n", "
\n", "
\n", "
\n", " \n", " 3\n", " \n", "
\n", "
\n", "
\n", " \n", " 1923/03/12\n", " \n", "
\n", "
\n", "
\n", " \n", " down\n", " \n", "
\n", "
\n", "
\n", " \n", " Isaac\n", " \n", "
\n", "
\n", "
\n", " \n", " Newton\n", " \n", "
\n", "
\n", "
\n", " \n", " 992\n", " \n", "
\n", "
\n", "
\n", " \n", " pasta\n", " \n", "
\n", "
\n", "
\n", " \n", " 9\n", " \n", "
\n", "
\n", "
\n", " \n", " 1999/02/15\n", " \n", "
\n", "
\n", "
\n", " \n", " never⋅\n", " \n", "
\n", "
\n", "
\n", "
\n", "\n", "
Viewing 10 of 19 rows / 7 columns
\n", "
1 partition(s) <class 'optimus.engines.spark.dataframe.SparkDataFrame'>
\n", "\n" ], "text/plain": [ "firstName lastName billingId product price birth dummyCol\n", "(object) (object) (int32) (object) (int32) (object) (object)\n", "----------- ----------- ----------- ---------- --------- ---------- ----------\n", "Luis Alvarez$$%! 123 Cake 10 1980/07/07 never\n", "André Ampère 423 piza 8 1950/07/08 gonna\n", "NiELS Böhr//((%% 551 pizza 8 1990/07/09 give\n", "PAUL dirac$ 521 pizza 8 1954/07/10 you\n", "Albert Einstein 634 pizza 8 1990/07/11 up\n", "Galileo GALiLEI 672 arepa 5 1930/08/12 never\n", "CaRL Ga%%%uss 323 taco 3 1970/07/13 gonna\n", "David H$$$ilbert 624 taaaccoo 3 1950/07/14 let\n", "Johannes KEPLER 735 taco 3 1920/04/22 you\n", "JaMES M$$ax%%well 875 taco 3 1923/03/12 down\n", "Isaac Newton 992 pasta 9 1999/02/15 never" ] }, "execution_count": 22, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.cols.rename(\"id\",\"id1\")\n", "df.cols.drop(\"id\")" ] }, { "cell_type": "code", "execution_count": 23, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "['id',\n", " 'firstName',\n", " 'lastName',\n", " 'billingId',\n", " 'product',\n", " 'price',\n", " 'birth',\n", " 'dummyCol']" ] }, "execution_count": 23, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.cols.names()" ] }, { "cell_type": "code", "execution_count": 27, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "{'id': 19,\n", " 'firstName': 19,\n", " 'lastName': 19,\n", " 'billingId': 19,\n", " 'product': 13,\n", " 'price': 8,\n", " 'birth': 19,\n", " 'dummyCol': 13}" ] }, "execution_count": 27, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.cols.count_uniques()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.6" } }, "nbformat": 4, "nbformat_minor": 4 }