{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "## Custom methods in `DropCorrelatedFeatures`\n", "\n", "In this tutorial we show how to pass a custom method to `DropCorrelatedFeatures` using the association measure [Distance Correlation](https://m-clark.github.io/docs/CorrelationComparison.pdf) from the python package [dcor](https://dcor.readthedocs.io/en/latest/index.html)." ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "import dcor\n", "import warnings\n", "\n", "from sklearn.datasets import make_classification\n", "from feature_engine.selection import DropCorrelatedFeatures\n", "\n", "warnings.filterwarnings('ignore')" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " | var_0 | \n", "var_1 | \n", "var_2 | \n", "var_3 | \n", "var_4 | \n", "var_5 | \n", "var_6 | \n", "var_7 | \n", "var_8 | \n", "var_9 | \n", "var_10 | \n", "var_11 | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n", "-0.718421 | \n", "-0.306430 | \n", "0.477337 | \n", "1.662651 | \n", "1.621889 | \n", "-0.226039 | \n", "2.089741 | \n", "-2.145033 | \n", "2.616778 | \n", "0.074477 | \n", "1.402662 | \n", "1.599289 | \n", "
1 | \n", "0.584286 | \n", "-0.871870 | \n", "1.490290 | \n", "3.644921 | \n", "3.584239 | \n", "-0.750463 | \n", "-0.024631 | \n", "-4.525042 | \n", "5.518534 | \n", "1.788593 | \n", "3.077793 | \n", "3.188758 | \n", "
2 | \n", "-1.644619 | \n", "-0.391961 | \n", "0.891121 | \n", "2.232705 | \n", "2.175168 | \n", "-0.278656 | \n", "-1.145170 | \n", "-2.897788 | \n", "3.535246 | \n", "-0.796662 | \n", "1.883299 | \n", "2.178584 | \n", "
3 | \n", "1.795776 | \n", "-2.645368 | \n", "1.568321 | \n", "1.449491 | \n", "1.754788 | \n", "-3.226923 | \n", "0.626374 | \n", "0.238043 | \n", "-0.310298 | \n", "1.247212 | \n", "1.256478 | \n", "-2.376344 | \n", "
4 | \n", "-0.683522 | \n", "-1.420178 | \n", "-0.120177 | \n", "1.019803 | \n", "1.171396 | \n", "-1.708503 | \n", "-0.114110 | \n", "-0.223424 | \n", "0.262247 | \n", "0.322612 | \n", "0.877768 | \n", "-0.972715 | \n", "
... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
995 | \n", "0.379855 | \n", "-0.529128 | \n", "-0.093361 | \n", "2.668557 | \n", "2.608481 | \n", "-0.410322 | \n", "-1.343059 | \n", "-3.409712 | \n", "4.159278 | \n", "-1.287548 | \n", "2.251801 | \n", "2.507712 | \n", "
996 | \n", "0.410435 | \n", "-1.590386 | \n", "0.301589 | \n", "0.962002 | \n", "1.140932 | \n", "-1.931062 | \n", "0.010015 | \n", "0.011464 | \n", "-0.025811 | \n", "-1.124970 | \n", "0.831563 | \n", "-1.315063 | \n", "
997 | \n", "0.562542 | \n", "-0.173591 | \n", "-0.551323 | \n", "1.456996 | \n", "1.407670 | \n", "-0.077131 | \n", "-1.215225 | \n", "-1.963863 | \n", "2.396559 | \n", "1.678760 | \n", "1.227821 | \n", "1.551989 | \n", "
998 | \n", "0.187248 | \n", "-0.355866 | \n", "-1.385539 | \n", "1.304138 | \n", "1.288720 | \n", "-0.324460 | \n", "0.260543 | \n", "-1.580115 | \n", "1.926655 | \n", "-1.330030 | \n", "1.101843 | \n", "1.071300 | \n", "
999 | \n", "0.105134 | \n", "-2.982815 | \n", "0.309657 | \n", "2.085668 | \n", "2.406926 | \n", "-3.593946 | \n", "-0.339890 | \n", "-0.387522 | \n", "0.451001 | \n", "-0.221839 | \n", "1.796291 | \n", "-2.113529 | \n", "
1000 rows × 12 columns
\n", "\n", " | var_0 | \n", "var_1 | \n", "var_2 | \n", "var_3 | \n", "var_6 | \n", "var_7 | \n", "var_9 | \n", "
---|---|---|---|---|---|---|---|
0 | \n", "-0.718421 | \n", "-0.306430 | \n", "0.477337 | \n", "1.662651 | \n", "2.089741 | \n", "-2.145033 | \n", "0.074477 | \n", "
1 | \n", "0.584286 | \n", "-0.871870 | \n", "1.490290 | \n", "3.644921 | \n", "-0.024631 | \n", "-4.525042 | \n", "1.788593 | \n", "
2 | \n", "-1.644619 | \n", "-0.391961 | \n", "0.891121 | \n", "2.232705 | \n", "-1.145170 | \n", "-2.897788 | \n", "-0.796662 | \n", "
3 | \n", "1.795776 | \n", "-2.645368 | \n", "1.568321 | \n", "1.449491 | \n", "0.626374 | \n", "0.238043 | \n", "1.247212 | \n", "
4 | \n", "-0.683522 | \n", "-1.420178 | \n", "-0.120177 | \n", "1.019803 | \n", "-0.114110 | \n", "-0.223424 | \n", "0.322612 | \n", "
... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
995 | \n", "0.379855 | \n", "-0.529128 | \n", "-0.093361 | \n", "2.668557 | \n", "-1.343059 | \n", "-3.409712 | \n", "-1.287548 | \n", "
996 | \n", "0.410435 | \n", "-1.590386 | \n", "0.301589 | \n", "0.962002 | \n", "0.010015 | \n", "0.011464 | \n", "-1.124970 | \n", "
997 | \n", "0.562542 | \n", "-0.173591 | \n", "-0.551323 | \n", "1.456996 | \n", "-1.215225 | \n", "-1.963863 | \n", "1.678760 | \n", "
998 | \n", "0.187248 | \n", "-0.355866 | \n", "-1.385539 | \n", "1.304138 | \n", "0.260543 | \n", "-1.580115 | \n", "-1.330030 | \n", "
999 | \n", "0.105134 | \n", "-2.982815 | \n", "0.309657 | \n", "2.085668 | \n", "-0.339890 | \n", "-0.387522 | \n", "-0.221839 | \n", "
1000 rows × 7 columns
\n", "\n", " | var_0 | \n", "var_1 | \n", "var_2 | \n", "var_3 | \n", "var_6 | \n", "var_7 | \n", "var_9 | \n", "
---|---|---|---|---|---|---|---|
0 | \n", "-0.718421 | \n", "-0.306430 | \n", "0.477337 | \n", "1.662651 | \n", "2.089741 | \n", "-2.145033 | \n", "0.074477 | \n", "
1 | \n", "0.584286 | \n", "-0.871870 | \n", "1.490290 | \n", "3.644921 | \n", "-0.024631 | \n", "-4.525042 | \n", "1.788593 | \n", "
2 | \n", "-1.644619 | \n", "-0.391961 | \n", "0.891121 | \n", "2.232705 | \n", "-1.145170 | \n", "-2.897788 | \n", "-0.796662 | \n", "
3 | \n", "1.795776 | \n", "-2.645368 | \n", "1.568321 | \n", "1.449491 | \n", "0.626374 | \n", "0.238043 | \n", "1.247212 | \n", "
4 | \n", "-0.683522 | \n", "-1.420178 | \n", "-0.120177 | \n", "1.019803 | \n", "-0.114110 | \n", "-0.223424 | \n", "0.322612 | \n", "
... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
995 | \n", "0.379855 | \n", "-0.529128 | \n", "-0.093361 | \n", "2.668557 | \n", "-1.343059 | \n", "-3.409712 | \n", "-1.287548 | \n", "
996 | \n", "0.410435 | \n", "-1.590386 | \n", "0.301589 | \n", "0.962002 | \n", "0.010015 | \n", "0.011464 | \n", "-1.124970 | \n", "
997 | \n", "0.562542 | \n", "-0.173591 | \n", "-0.551323 | \n", "1.456996 | \n", "-1.215225 | \n", "-1.963863 | \n", "1.678760 | \n", "
998 | \n", "0.187248 | \n", "-0.355866 | \n", "-1.385539 | \n", "1.304138 | \n", "0.260543 | \n", "-1.580115 | \n", "-1.330030 | \n", "
999 | \n", "0.105134 | \n", "-2.982815 | \n", "0.309657 | \n", "2.085668 | \n", "-0.339890 | \n", "-0.387522 | \n", "-0.221839 | \n", "
1000 rows × 7 columns
\n", "