{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "In this notebook, we will explore how to use Python in a streaming and distributed manner\n", "\n", "## Loading the dataset\n", "\n", "To simulate streaming data, we will load data into a Pandas dataframe. Then, we will iterate via each `Row` object, which is a dictionary object.\n", "\n", "`whylogs.DatasetProfile.track` method accepts dictionary of `[feature_name, value]`." ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import datetime\n", "import os.path\n", "import pandas as pd" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " | id | \n", "member_id | \n", "loan_amnt | \n", "funded_amnt | \n", "funded_amnt_inv | \n", "term | \n", "int_rate | \n", "installment | \n", "grade | \n", "sub_grade | \n", "... | \n", "hardship_payoff_balance_amount | \n", "hardship_last_payment_amount | \n", "disbursement_method | \n", "debt_settlement_flag | \n", "debt_settlement_flag_date | \n", "settlement_status | \n", "settlement_date | \n", "settlement_amount | \n", "settlement_percentage | \n", "settlement_term | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
10599 | \n", "96596008 | \n", "NaN | \n", "15000.0 | \n", "15000.0 | \n", "15000.0 | \n", "36 months | \n", "15.99 | \n", "527.29 | \n", "C | \n", "C5 | \n", "... | \n", "NaN | \n", "NaN | \n", "Cash | \n", "N | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "
10601 | \n", "96703051 | \n", "NaN | \n", "14575.0 | \n", "14575.0 | \n", "14575.0 | \n", "36 months | \n", "25.49 | \n", "583.29 | \n", "E | \n", "E4 | \n", "... | \n", "NaN | \n", "NaN | \n", "Cash | \n", "N | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "
10602 | \n", "96960509 | \n", "NaN | \n", "5000.0 | \n", "5000.0 | \n", "5000.0 | \n", "36 months | \n", "8.24 | \n", "157.24 | \n", "B | \n", "B1 | \n", "... | \n", "NaN | \n", "NaN | \n", "Cash | \n", "N | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "
10603 | \n", "97463966 | \n", "NaN | \n", "13200.0 | \n", "13200.0 | \n", "13200.0 | \n", "60 months | \n", "13.99 | \n", "307.08 | \n", "C | \n", "C3 | \n", "... | \n", "NaN | \n", "NaN | \n", "Cash | \n", "N | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "
10605 | \n", "96841832 | \n", "NaN | \n", "9500.0 | \n", "9500.0 | \n", "9500.0 | \n", "36 months | \n", "8.24 | \n", "298.75 | \n", "B | \n", "B1 | \n", "... | \n", "NaN | \n", "NaN | \n", "Cash | \n", "N | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "
... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
10914 | \n", "95617334 | \n", "NaN | \n", "6500.0 | \n", "6500.0 | \n", "6250.0 | \n", "36 months | \n", "5.32 | \n", "195.75 | \n", "A | \n", "A1 | \n", "... | \n", "NaN | \n", "NaN | \n", "Cash | \n", "N | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "
10915 | \n", "95129874 | \n", "NaN | \n", "15000.0 | \n", "15000.0 | \n", "15000.0 | \n", "60 months | \n", "15.99 | \n", "364.70 | \n", "C | \n", "C5 | \n", "... | \n", "NaN | \n", "NaN | \n", "Cash | \n", "N | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "
10916 | \n", "96187258 | \n", "NaN | \n", "40000.0 | \n", "40000.0 | \n", "40000.0 | \n", "36 months | \n", "7.49 | \n", "1244.07 | \n", "A | \n", "A4 | \n", "... | \n", "NaN | \n", "NaN | \n", "Cash | \n", "N | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "
10917 | \n", "94469381 | \n", "NaN | \n", "5050.0 | \n", "5050.0 | \n", "5050.0 | \n", "36 months | \n", "21.49 | \n", "191.54 | \n", "D | \n", "D5 | \n", "... | \n", "NaN | \n", "NaN | \n", "Cash | \n", "N | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "
10918 | \n", "94480548 | \n", "NaN | \n", "7350.0 | \n", "7350.0 | \n", "7350.0 | \n", "36 months | \n", "12.74 | \n", "246.74 | \n", "C | \n", "C1 | \n", "... | \n", "NaN | \n", "NaN | \n", "Cash | \n", "N | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "
309 rows × 150 columns
\n", "