{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "%matplotlib inline\n", "from rdkit import Chem\n", "from rdkit.Chem import AllChem\n", "from rdkit.Chem import DataStructs\n", "from sklearn.ensemble import RandomForestClassifier\n", "from sklearn.metrics import classification_report\n", "from sklearn.metrics import confusion_matrix\n", "from sklearn.model_selection import train_test_split\n", "from imblearn.over_sampling import SMOTE\n", "from imblearn.over_sampling import ADASYN\n", "import pandas as pd\n", "import numpy as np\n", "import matplotlib.pyplot as plt\n", "from IPython import display\n", "from sklearn.decomposition import PCA" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "df = pd.read_csv('chembl_5HT.csv')\n", "df = df.dropna()" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " | standard_relation | \n", "standard_value | \n", "standard_type | \n", "pchembl_value | \n", "assay_chembl_id | \n", "target_chembl_id | \n", "canonical_smiles | \n", "compound_chembl_id | \n", "
---|---|---|---|---|---|---|---|---|
1 | \n", "= | \n", "168.0 | \n", "Ki | \n", "6.78 | \n", "CHEMBL615460 | \n", "CHEMBL214 | \n", "CCCN(CCC)[C@@H]1CCc2c(OC)cccc2[C@@H]1C | \n", "CHEMBL278751 | \n", "
2 | \n", "= | \n", "181.0 | \n", "Ki | \n", "6.74 | \n", "CHEMBL615809 | \n", "CHEMBL214 | \n", "C(N1CCN(CC1)c2ncccn2)c3c[nH]c(n3)c4ccccc4 | \n", "CHEMBL103772 | \n", "