{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "%load_ext autoreload\n", "%autoreload 2" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "import sys\n", "sys.path.append(\"..\")" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "from optimus import Optimus" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "C:\\Users\\argenisleon\\Anaconda3\\lib\\site-packages\\statsmodels\\iolib\\foreign.py:651: DeprecationWarning: `np.long` is a deprecated alias for `np.compat.long`. To silence this warning, use `np.compat.long` by itself. In the likely event your code does not need to work on Python 2 you can use the builtin `int` for which `np.compat.long` is itself an alias. Doing this will not modify any behaviour and is safe. When replacing `np.long`, you may wish to use e.g. `np.int64` or `np.int32` to specify the precision. If you wish to review your current use, check the release note link for additional information.\n", "Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations\n", " _type_converters = {253 : np.long, 252 : int}\n", "C:\\Users\\argenisleon\\Anaconda3\\lib\\site-packages\\patsy\\constraint.py:13: DeprecationWarning: Using or importing the ABCs from 'collections' instead of from 'collections.abc' is deprecated since Python 3.3,and in 3.9 it will stop working\n", " from collections import Mapping\n", "C:\\Users\\argenisleon\\Anaconda3\\lib\\site-packages\\statsmodels\\stats\\_lilliefors.py:163: DeprecationWarning: `np.float` is a deprecated alias for the builtin `float`. To silence this warning, use `float` by itself. Doing this will not modify any behavior and is safe. If you specifically wanted the numpy scalar type, use `np.float64` here.\n", "Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations\n", " size = np.array(sorted(cv_data), dtype=np.float)\n", "\n", " You are using PySparkling of version 2.4.10, but your PySpark is of\n", " version 3.1.1. Please make sure Spark and PySparkling versions are compatible. \n" ] } ], "source": [ "op = Optimus(\"spark\")" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "ename": "TypeError", "evalue": "Can't instantiate abstract class SparkDataFrame with abstract methods _base_to_dfd, encoding, visualize", "output_type": "error", "traceback": [ "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[1;31mTypeError\u001b[0m Traceback (most recent call last)", "\u001b[1;32m\u001b[0m in \u001b[0;36m\u001b[1;34m\u001b[0m\n\u001b[1;32m----> 1\u001b[1;33m \u001b[0mop\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mload\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mcsv\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m\"data/foo.csv\"\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m", "\u001b[1;32m~\\Documents\\Optimus\\optimus\\engines\\spark\\io\\load.py\u001b[0m in \u001b[0;36mcsv\u001b[1;34m(path, sep, header, infer_schema, encoding, null_value, n_rows, error_bad_lines, *args, **kwargs)\u001b[0m\n\u001b[0;32m 103\u001b[0m \u001b[0msdf\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0msdf\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mlimit\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mn_rows\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 104\u001b[0m \u001b[1;31m# print(type(sdf))\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 105\u001b[1;33m \u001b[0mdf\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mSparkDataFrame\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0msdf\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 106\u001b[0m \u001b[0mdf\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mmeta\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mMeta\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mset\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mdf\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mmeta\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;34m\"file_name\"\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mfile_name\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 107\u001b[0m \u001b[1;32mexcept\u001b[0m \u001b[0mIOError\u001b[0m \u001b[1;32mas\u001b[0m \u001b[0merror\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", "\u001b[1;31mTypeError\u001b[0m: Can't instantiate abstract class SparkDataFrame with abstract methods _base_to_dfd, encoding, visualize" ] } ], "source": [ "op.load.csv(\"data/foo.csv\")" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.6" } }, "nbformat": 4, "nbformat_minor": 4 }