{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "from ugropy import abdulelah_gani_t, abdulelah_gani, instantiate_mol_object\n", "\n", "import pandas as pd\n", "\n", "import numpy as np\n", "\n", "from rdkit.Chem import Draw\n", "from rdkit import Chem" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "df = pd.read_csv(\"../abdulelah_gani_frags/gf.csv\", index_col=\"SMILES\", sep=\"|\", comment=\"?\")\n", "\n", "primary = np.linspace(351, 424, 74, dtype=int).astype(str)\n", "\n", "df = df[primary]\n", "df.rename(columns=lambda col: int(col) if col.isdigit() else col, inplace=True)\n", "\n", "df.dropna(inplace=True)" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "OCCN(CCO)CCO\n", "172\n", "Subgrupos:\n", "{'OH-(CHp)k-NHx-(CHn)m-OH (m,k>0; p,n,x in 0..2)': 3}\n", "Subgrupos por número:\n", "{357: 3}\n", "Gani: \n", "{}\n", "Gani group names:\n", "{}\n" ] } ], "source": [ "for idx, smiles in enumerate(df.index):\n", " try:\n", " solution = abdulelah_gani_t.get_groups(smiles, \"smiles\")\n", " except Exception as e:\n", " print(smiles, idx)\n", " raise e\n", "\n", " # Filtrar las columnas de la fila que no sean iguales a 0\n", " row = df.loc[smiles]\n", " if isinstance(row, pd.DataFrame):\n", " row = row.iloc[0]\n", " row = row[row != 0]\n", " \n", " # Convertir la fila filtrada a diccionario\n", " row_dict = row.to_dict()\n", " \n", " # Eliminar las llaves 372 y 373 para la comparación\n", " #row_dict.pop(372, None)\n", " #row_dict.pop(373, None)\n", " solution_dict = solution.subgroups_numbers.copy()\n", " #solution_dict.pop(372, None)\n", " #solution_dict.pop(373, None)\n", "\n", " # Verificar si ninguna solución tiene subgroups_numbers igual a row_dict\n", " if solution_dict != row_dict:\n", " print(smiles)\n", " print(idx)\n", " print(\"Subgrupos:\")\n", " print(str(solution.subgroups)) # Convertir a string y unir con saltos de línea\n", " print(\"Subgrupos por número:\")\n", " print(str(solution.subgroups_numbers)) # Convertir a string y unir con saltos de línea\n", " print(\"Gani: \")\n", " print(row_dict)\n", " \n", " gani_groups = {}\n", " for group, ocurr in row_dict.items():\n", " group_name = abdulelah_gani_t.subgroups_info.loc[abdulelah_gani_t.subgroups_info[\"group_number\"] == group].index[0]\n", " gani_groups[group_name] = ocurr\n", " \n", " print(\"Gani group names:\")\n", " print(gani_groups)\n", " \n", " wrong_smiles = smiles\n", " break" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "{'OH-(CHp)k-NHx-(CHn)m-OH (m,k>0; p,n,x in 0..2)_0': (3, 2, 1, 0, 4, 5, 6),\n", " 'OH-(CHp)k-NHx-(CHn)m-OH (m,k>0; p,n,x in 0..2)_1': (3, 2, 1, 0, 7, 8, 9),\n", " 'OH-(CHp)k-NHx-(CHn)m-OH (m,k>0; p,n,x in 0..2)_2': (3, 4, 5, 6, 7, 8, 9)}" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "mol = instantiate_mol_object(wrong_smiles, \"smiles\")\n", "\n", "abdulelah_gani_t.detect_fragments(mol)" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "data": { "image/svg+xml": [ "\n", "\n", " \n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "OH-(CHp)k-NHx-(CHn)m-OH (m,k>0; p,n,x in 0..2): 3" ], "text/plain": [ "" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "sol = abdulelah_gani_t.get_groups(wrong_smiles, \"smiles\")\n", "\n", "sol.draw(width=700)" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "data": { "text/html": [ "-299.3002870483604 kilojoule/mole" ], "text/latex": [ "$-299.3002870483604\\ \\frac{\\mathrm{kilojoule}}{\\mathrm{mole}}$" ], "text/plain": [ "" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "sm = abdulelah_gani.get_groups(wrong_smiles, \"smiles\")\n", "\n", "sm.ig_formation_gibbs" ] } ], "metadata": { "kernelspec": { "display_name": "ugropy", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.10.12" } }, "nbformat": 4, "nbformat_minor": 2 }