{ "cells": [ { "cell_type": "markdown", "id": "bffcdfc4", "metadata": {}, "source": [ "# Explore Pandera\n", "\n", "https://pandera.readthedocs.io/en/stable/index.html" ] }, { "cell_type": "code", "execution_count": 1, "id": "395e847b", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
column1column2column3
01-1.3value_1
14-1.4value_2
20-2.9value_3
310-10.1value_2
49-20.4value_1
\n", "
" ], "text/plain": [ " column1 column2 column3\n", "0 1 -1.3 value_1\n", "1 4 -1.4 value_2\n", "2 0 -2.9 value_3\n", "3 10 -10.1 value_2\n", "4 9 -20.4 value_1" ] }, "execution_count": 1, "metadata": {}, "output_type": "execute_result" } ], "source": [ "import pandas as pd\n", "\n", "# data to validate\n", "df = pd.DataFrame({\n", " \"column1\": [1, 4, 0, 10, 9],\n", " \"column2\": [-1.3, -1.4, -2.9, -10.1, -20.4],\n", " \"column3\": [\"value_1\", \"value_2\", \"value_3\", \"value_2\", \"value_1\"],\n", "})\n", "\n", "df" ] }, { "cell_type": "markdown", "id": "4f28fc0f", "metadata": {}, "source": [ "## \"Quick\" API" ] }, { "cell_type": "code", "execution_count": 4, "id": "758ccad6", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
column1column2column3
01-1.3value_1
14-1.4value_2
20-2.9value_3
310-10.1value_2
49-20.4value_1
\n", "
" ], "text/plain": [ " column1 column2 column3\n", "0 1 -1.3 value_1\n", "1 4 -1.4 value_2\n", "2 0 -2.9 value_3\n", "3 10 -10.1 value_2\n", "4 9 -20.4 value_1" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "schema = pa.DataFrameSchema({\n", " \"column1\": pa.Column(int, checks=pa.Check.le(10)),\n", " \"column2\": pa.Column(float, checks=pa.Check.lt(-1.2)),\n", " \"column3\": pa.Column(str, checks=[\n", " pa.Check.str_startswith(\"value_\"),\n", " # define custom checks as functions that take a series as input and\n", " # outputs a boolean or boolean Series\n", " pa.Check(lambda s: s.str.split(\"_\", expand=True).shape[1] == 2)\n", " ]),\n", "})\n", "\n", "schema(df)" ] }, { "cell_type": "markdown", "id": "b5d413b9", "metadata": {}, "source": [ "## OO API" ] }, { "cell_type": "code", "execution_count": 6, "id": "31cd5770", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
column1column2column3
01-1.3value_1
14-1.4value_2
20-2.9value_3
310-10.1value_2
49-20.4value_1
\n", "
" ], "text/plain": [ " column1 column2 column3\n", "0 1 -1.3 value_1\n", "1 4 -1.4 value_2\n", "2 0 -2.9 value_3\n", "3 10 -10.1 value_2\n", "4 9 -20.4 value_1" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "import pandera as pa\n", "from pandera.typing import Series\n", "\n", "class Schema(pa.DataFrameModel):\n", "\n", " column1: Series[int] = pa.Field(le=10)\n", " column2: Series[float] = pa.Field(lt=-1.2)\n", " column3: Series[str] = pa.Field(str_startswith=\"value_\")\n", "\n", " @pa.check(\"column3\")\n", " def column_3_check(cls, series: Series[str]) -> Series[bool]:\n", " \"\"\"Check that column3 values have two elements after being split with '_'\"\"\"\n", " return series.str.split(\"_\", expand=True).shape[1] == 2\n", "\n", "Schema.validate(df) " ] }, { "cell_type": "markdown", "id": "e71cec17", "metadata": {}, "source": [ "## Load a LAS file" ] }, { "cell_type": "code", "execution_count": null, "id": "fb1e83dd", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "id": "4893f41f", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "id": "c2dc0058", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "py310", "language": "python", "name": "py310" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.10.8" } }, "nbformat": 4, "nbformat_minor": 5 }