{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#  Exercise 01\n",
    "\n",
    "The goal of is to compare the performance of our classifier (81% accuracy) to some baseline classifiers that  would ignore the input data and instead make constant predictions.\n",
    "\n",
    "The online [documentation for DummyClassifier](https://scikit-learn.org/stable/modules/model_evaluation.html#dummy-estimators) gives instructions on how to use it."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "\n",
    "df = pd.read_csv(\n",
    "    \"https://www.openml.org/data/get_csv/1595261/adult-census.csv\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "target_name = \"class\"\n",
    "target = df[target_name].to_numpy()\n",
    "data = df.drop(columns=[target_name, \"fnlwgt\"])\n",
    "numerical_columns = [\n",
    "    c for c in data.columns if data[c].dtype.kind in [\"i\", \"f\"]]\n",
    "data_numeric = data[numerical_columns]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "from sklearn.model_selection import cross_val_score\n",
    "from sklearn.dummy import DummyClassifier\n",
    "\n",
    "# TODO: write me!"
   ]
  }
 ],
 "metadata": {
  "jupytext": {
   "formats": "python_scripts//py:percent,notebooks//ipynb"
  },
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.5"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}