{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Exercise 01\n", "\n", "The goal of is to compare the performance of our classifier (81% accuracy) to some baseline classifiers that would ignore the input data and instead make constant predictions.\n", "\n", "The online [documentation for DummyClassifier](https://scikit-learn.org/stable/modules/model_evaluation.html#dummy-estimators) gives instructions on how to use it." ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "\n", "df = pd.read_csv(\n", " \"https://www.openml.org/data/get_csv/1595261/adult-census.csv\")" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "target_name = \"class\"\n", "target = df[target_name].to_numpy()\n", "data = df.drop(columns=[target_name, \"fnlwgt\"])\n", "numerical_columns = [\n", " c for c in data.columns if data[c].dtype.kind in [\"i\", \"f\"]]\n", "data_numeric = data[numerical_columns]" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "from sklearn.model_selection import cross_val_score\n", "from sklearn.dummy import DummyClassifier\n", "\n", "# TODO: write me!" ] } ], "metadata": { "jupytext": { "formats": "python_scripts//py:percent,notebooks//ipynb" }, "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.5" } }, "nbformat": 4, "nbformat_minor": 2 }