{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "from ugropy import abdulelah_gani_t, abdulelah_gani, instantiate_mol_object\n",
    "\n",
    "import pandas as pd\n",
    "\n",
    "import numpy as np\n",
    "\n",
    "from rdkit.Chem import Draw\n",
    "from rdkit import Chem"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "df = pd.read_csv(\"../abdulelah_gani_frags/gf.csv\", index_col=\"SMILES\", sep=\"|\", comment=\"?\")\n",
    "\n",
    "primary = np.linspace(351, 424, 74, dtype=int).astype(str)\n",
    "\n",
    "df = df[primary]\n",
    "df.rename(columns=lambda col: int(col) if col.isdigit() else col, inplace=True)\n",
    "\n",
    "df.dropna(inplace=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "OCCN(CCO)CCO\n",
      "172\n",
      "Subgrupos:\n",
      "{'OH-(CHp)k-NHx-(CHn)m-OH (m,k>0; p,n,x in 0..2)': 3}\n",
      "Subgrupos por número:\n",
      "{357: 3}\n",
      "Gani: \n",
      "{}\n",
      "Gani group names:\n",
      "{}\n"
     ]
    }
   ],
   "source": [
    "for idx, smiles in enumerate(df.index):\n",
    "    try:\n",
    "        solution = abdulelah_gani_t.get_groups(smiles, \"smiles\")\n",
    "    except Exception as e:\n",
    "        print(smiles, idx)\n",
    "        raise e\n",
    "\n",
    "    # Filtrar las columnas de la fila que no sean iguales a 0\n",
    "    row = df.loc[smiles]\n",
    "    if isinstance(row, pd.DataFrame):\n",
    "        row = row.iloc[0]\n",
    "    row = row[row != 0]\n",
    "    \n",
    "    # Convertir la fila filtrada a diccionario\n",
    "    row_dict = row.to_dict()\n",
    "    \n",
    "    # Eliminar las llaves 372 y 373 para la comparación\n",
    "    #row_dict.pop(372, None)\n",
    "    #row_dict.pop(373, None)\n",
    "    solution_dict = solution.subgroups_numbers.copy()\n",
    "    #solution_dict.pop(372, None)\n",
    "    #solution_dict.pop(373, None)\n",
    "\n",
    "    # Verificar si ninguna solución tiene subgroups_numbers igual a row_dict\n",
    "    if solution_dict != row_dict:\n",
    "        print(smiles)\n",
    "        print(idx)\n",
    "        print(\"Subgrupos:\")\n",
    "        print(str(solution.subgroups))  # Convertir a string y unir con saltos de línea\n",
    "        print(\"Subgrupos por número:\")\n",
    "        print(str(solution.subgroups_numbers))  # Convertir a string y unir con saltos de línea\n",
    "        print(\"Gani: \")\n",
    "        print(row_dict)\n",
    "        \n",
    "        gani_groups = {}\n",
    "        for group, ocurr in row_dict.items():\n",
    "            group_name = abdulelah_gani_t.subgroups_info.loc[abdulelah_gani_t.subgroups_info[\"group_number\"] == group].index[0]\n",
    "            gani_groups[group_name] = ocurr\n",
    "            \n",
    "        print(\"Gani group names:\")\n",
    "        print(gani_groups)\n",
    "        \n",
    "        wrong_smiles = smiles\n",
    "        break"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'OH-(CHp)k-NHx-(CHn)m-OH (m,k>0; p,n,x in 0..2)_0': (3, 2, 1, 0, 4, 5, 6),\n",
       " 'OH-(CHp)k-NHx-(CHn)m-OH (m,k>0; p,n,x in 0..2)_1': (3, 2, 1, 0, 7, 8, 9),\n",
       " 'OH-(CHp)k-NHx-(CHn)m-OH (m,k>0; p,n,x in 0..2)_2': (3, 4, 5, 6, 7, 8, 9)}"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "mol = instantiate_mol_object(wrong_smiles, \"smiles\")\n",
    "\n",
    "abdulelah_gani_t.detect_fragments(mol)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "image/svg+xml": [
       "<svg xmlns=\"http://www.w3.org/2000/svg\" xmlns:rdkit=\"http://www.rdkit.org/xml\" xmlns:xlink=\"http://www.w3.org/1999/xlink\" version=\"1.1\" baseProfile=\"full\" xml:space=\"preserve\" width=\"700px\" height=\"200px\" viewBox=\"0 0 700 200\">\n",
       "<!-- END OF HEADER -->\n",
       "<rect style=\"opacity:1.0;fill:#FFFFFF;stroke:none\" width=\"700.0\" height=\"200.0\" x=\"0.0\" y=\"0.0\"> </rect>\n",
       "<ellipse cx=\"479.7\" cy=\"125.4\" rx=\"18.0\" ry=\"10.0\" class=\"atom-0\" style=\"fill:#1F77B4A5;fill-rule:evenodd;stroke:#1F77B4A5;stroke-width:1.0px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1\"/>\n",
       "<ellipse cx=\"427.1\" cy=\"110.8\" rx=\"9.7\" ry=\"9.7\" class=\"atom-1\" style=\"fill:#1F77B4A5;fill-rule:evenodd;stroke:#1F77B4A5;stroke-width:1.0px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1\"/>\n",
       "<ellipse cx=\"391.4\" cy=\"143.5\" rx=\"9.7\" ry=\"9.7\" class=\"atom-2\" style=\"fill:#1F77B4A5;fill-rule:evenodd;stroke:#1F77B4A5;stroke-width:1.0px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1\"/>\n",
       "<ellipse cx=\"345.2\" cy=\"129.0\" rx=\"9.7\" ry=\"9.7\" class=\"atom-3\" style=\"fill:#1F77B4A5;fill-rule:evenodd;stroke:#1F77B4A5;stroke-width:1.0px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1\"/>\n",
       "<ellipse cx=\"309.5\" cy=\"161.7\" rx=\"9.7\" ry=\"9.7\" class=\"atom-4\" style=\"fill:#1F77B4A5;fill-rule:evenodd;stroke:#1F77B4A5;stroke-width:1.0px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1\"/>\n",
       "<ellipse cx=\"263.2\" cy=\"147.2\" rx=\"9.7\" ry=\"9.7\" class=\"atom-5\" style=\"fill:#1F77B4A5;fill-rule:evenodd;stroke:#1F77B4A5;stroke-width:1.0px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1\"/>\n",
       "<ellipse cx=\"220.8\" cy=\"180.0\" rx=\"18.5\" ry=\"10.0\" class=\"atom-6\" style=\"fill:#1F77B4A5;fill-rule:evenodd;stroke:#1F77B4A5;stroke-width:1.0px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1\"/>\n",
       "<ellipse cx=\"334.6\" cy=\"81.7\" rx=\"9.7\" ry=\"9.7\" class=\"atom-7\" style=\"fill:#1F77B4A5;fill-rule:evenodd;stroke:#1F77B4A5;stroke-width:1.0px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1\"/>\n",
       "<ellipse cx=\"288.4\" cy=\"67.1\" rx=\"9.7\" ry=\"9.7\" class=\"atom-8\" style=\"fill:#1F77B4A5;fill-rule:evenodd;stroke:#1F77B4A5;stroke-width:1.0px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1\"/>\n",
       "<ellipse cx=\"284.4\" cy=\"20.0\" rx=\"18.0\" ry=\"10.0\" class=\"atom-9\" style=\"fill:#1F77B4A5;fill-rule:evenodd;stroke:#1F77B4A5;stroke-width:1.0px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1\"/>\n",
       "<path class=\"bond-0 atom-0 atom-1\" d=\"M 465.4,122.9 L 446.2,116.8\" style=\"fill:none;fill-rule:evenodd;stroke:#FF0000;stroke-width:2.0px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1\"/>\n",
       "<path class=\"bond-0 atom-0 atom-1\" d=\"M 446.2,116.8 L 427.1,110.8\" style=\"fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:2.0px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1\"/>\n",
       "<path class=\"bond-1 atom-1 atom-2\" d=\"M 427.1,110.8 L 391.4,143.5\" style=\"fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:2.0px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1\"/>\n",
       "<path class=\"bond-2 atom-2 atom-3\" d=\"M 391.4,143.5 L 371.7,137.3\" style=\"fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:2.0px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1\"/>\n",
       "<path class=\"bond-2 atom-2 atom-3\" d=\"M 371.7,137.3 L 352.0,131.1\" style=\"fill:none;fill-rule:evenodd;stroke:#0000FF;stroke-width:2.0px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1\"/>\n",
       "<path class=\"bond-3 atom-3 atom-4\" d=\"M 338.4,135.2 L 323.9,148.5\" style=\"fill:none;fill-rule:evenodd;stroke:#0000FF;stroke-width:2.0px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1\"/>\n",
       "<path class=\"bond-3 atom-3 atom-4\" d=\"M 323.9,148.5 L 309.5,161.7\" style=\"fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:2.0px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1\"/>\n",
       "<path class=\"bond-4 atom-4 atom-5\" d=\"M 309.5,161.7 L 263.2,147.2\" style=\"fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:2.0px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1\"/>\n",
       "<path class=\"bond-5 atom-5 atom-6\" d=\"M 263.2,147.2 L 249.3,159.9\" style=\"fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:2.0px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1\"/>\n",
       "<path class=\"bond-5 atom-5 atom-6\" d=\"M 249.3,159.9 L 235.4,172.7\" style=\"fill:none;fill-rule:evenodd;stroke:#FF0000;stroke-width:2.0px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1\"/>\n",
       "<path class=\"bond-6 atom-3 atom-7\" d=\"M 343.3,120.5 L 339.0,101.1\" style=\"fill:none;fill-rule:evenodd;stroke:#0000FF;stroke-width:2.0px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1\"/>\n",
       "<path class=\"bond-6 atom-3 atom-7\" d=\"M 339.0,101.1 L 334.6,81.7\" style=\"fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:2.0px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1\"/>\n",
       "<path class=\"bond-7 atom-7 atom-8\" d=\"M 334.6,81.7 L 288.4,67.1\" style=\"fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:2.0px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1\"/>\n",
       "<path class=\"bond-8 atom-8 atom-9\" d=\"M 288.4,67.1 L 284.2,47.9\" style=\"fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:2.0px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1\"/>\n",
       "<path class=\"bond-8 atom-8 atom-9\" d=\"M 284.2,47.9 L 279.9,28.6\" style=\"fill:none;fill-rule:evenodd;stroke:#FF0000;stroke-width:2.0px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1\"/>\n",
       "<path d=\"M 428.0,111.1 L 427.1,110.8 L 425.3,112.4\" style=\"fill:none;stroke:#000000;stroke-width:2.0px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;\"/>\n",
       "<path d=\"M 393.2,141.9 L 391.4,143.5 L 390.4,143.2\" style=\"fill:none;stroke:#000000;stroke-width:2.0px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;\"/>\n",
       "<path d=\"M 310.2,161.1 L 309.5,161.7 L 307.1,161.0\" style=\"fill:none;stroke:#000000;stroke-width:2.0px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;\"/>\n",
       "<path d=\"M 265.6,147.9 L 263.2,147.2 L 262.5,147.8\" style=\"fill:none;stroke:#000000;stroke-width:2.0px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;\"/>\n",
       "<path d=\"M 334.9,82.7 L 334.6,81.7 L 332.3,81.0\" style=\"fill:none;stroke:#000000;stroke-width:2.0px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;\"/>\n",
       "<path d=\"M 290.8,67.9 L 288.4,67.1 L 288.2,66.2\" style=\"fill:none;stroke:#000000;stroke-width:2.0px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;\"/>\n",
       "<path class=\"atom-0\" d=\"M 467.0 125.4 Q 467.0 122.1, 468.6 120.2 Q 470.2 118.4, 473.3 118.4 Q 476.3 118.4, 478.0 120.2 Q 479.6 122.1, 479.6 125.4 Q 479.6 128.7, 477.9 130.6 Q 476.3 132.5, 473.3 132.5 Q 470.3 132.5, 468.6 130.6 Q 467.0 128.7, 467.0 125.4 M 473.3 130.9 Q 475.4 130.9, 476.5 129.5 Q 477.6 128.1, 477.6 125.4 Q 477.6 122.7, 476.5 121.3 Q 475.4 120.0, 473.3 120.0 Q 471.2 120.0, 470.0 121.3 Q 468.9 122.7, 468.9 125.4 Q 468.9 128.1, 470.0 129.5 Q 471.2 130.9, 473.3 130.9 \" fill=\"#FF0000\"/>\n",
       "<path class=\"atom-0\" d=\"M 481.7 118.6 L 483.6 118.6 L 483.6 124.4 L 490.6 124.4 L 490.6 118.6 L 492.4 118.6 L 492.4 132.3 L 490.6 132.3 L 490.6 125.9 L 483.6 125.9 L 483.6 132.3 L 481.7 132.3 L 481.7 118.6 \" fill=\"#FF0000\"/>\n",
       "<path class=\"atom-3\" d=\"M 342.1 122.1 L 346.6 129.4 Q 347.1 130.1, 347.8 131.4 Q 348.5 132.7, 348.5 132.8 L 348.5 122.1 L 350.4 122.1 L 350.4 135.8 L 348.5 135.8 L 343.7 127.9 Q 343.1 127.0, 342.5 125.9 Q 341.9 124.8, 341.7 124.5 L 341.7 135.8 L 340.0 135.8 L 340.0 122.1 L 342.1 122.1 \" fill=\"#0000FF\"/>\n",
       "<path class=\"atom-6\" d=\"M 207.7 173.1 L 209.6 173.1 L 209.6 179.0 L 216.6 179.0 L 216.6 173.1 L 218.4 173.1 L 218.4 186.9 L 216.6 186.9 L 216.6 180.5 L 209.6 180.5 L 209.6 186.9 L 207.7 186.9 L 207.7 173.1 \" fill=\"#FF0000\"/>\n",
       "<path class=\"atom-6\" d=\"M 221.2 180.0 Q 221.2 176.7, 222.9 174.8 Q 224.5 173.0, 227.5 173.0 Q 230.6 173.0, 232.2 174.8 Q 233.8 176.7, 233.8 180.0 Q 233.8 183.3, 232.2 185.2 Q 230.5 187.1, 227.5 187.1 Q 224.5 187.1, 222.9 185.2 Q 221.2 183.3, 221.2 180.0 M 227.5 185.5 Q 229.6 185.5, 230.8 184.1 Q 231.9 182.7, 231.9 180.0 Q 231.9 177.3, 230.8 175.9 Q 229.6 174.5, 227.5 174.5 Q 225.4 174.5, 224.3 175.9 Q 223.2 177.3, 223.2 180.0 Q 223.2 182.7, 224.3 184.1 Q 225.4 185.5, 227.5 185.5 \" fill=\"#FF0000\"/>\n",
       "<path class=\"atom-9\" d=\"M 271.6 19.9 Q 271.6 16.6, 273.3 14.8 Q 274.9 12.9, 277.9 12.9 Q 281.0 12.9, 282.6 14.8 Q 284.2 16.6, 284.2 19.9 Q 284.2 23.2, 282.6 25.1 Q 280.9 27.0, 277.9 27.0 Q 274.9 27.0, 273.3 25.1 Q 271.6 23.2, 271.6 19.9 M 277.9 25.5 Q 280.0 25.5, 281.2 24.1 Q 282.3 22.6, 282.3 19.9 Q 282.3 17.2, 281.2 15.8 Q 280.0 14.5, 277.9 14.5 Q 275.8 14.5, 274.7 15.8 Q 273.6 17.2, 273.6 19.9 Q 273.6 22.7, 274.7 24.1 Q 275.8 25.5, 277.9 25.5 \" fill=\"#FF0000\"/>\n",
       "<path class=\"atom-9\" d=\"M 286.4 13.1 L 288.2 13.1 L 288.2 18.9 L 295.2 18.9 L 295.2 13.1 L 297.1 13.1 L 297.1 26.8 L 295.2 26.8 L 295.2 20.5 L 288.2 20.5 L 288.2 26.8 L 286.4 26.8 L 286.4 13.1 \" fill=\"#FF0000\"/>\n",
       "<rect x=\"1\" y=\"5\" width=\"18.0\" height=\"18.0\" fill=\"rgb(31, 119, 179)\"/><text x=\"19.200000000000003\" y=\"20\" font-family=\"Helvetica\" font-size=\"12\" fill=\"black\">OH-(CHp)k-NHx-(CHn)m-OH (m,k&gt;0; p,n,x in 0..2): 3</text><text x=\"350.0\" y=\"40\" font-family=\"Helvetica\" font-size=\"12\" font-weight=\"bold\" fill=\"black\" text-anchor=\"middle\"/></svg>"
      ],
      "text/plain": [
       "<IPython.core.display.SVG object>"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "sol = abdulelah_gani_t.get_groups(wrong_smiles, \"smiles\")\n",
    "\n",
    "sol.draw(width=700)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "-299.3002870483604 kilojoule/mole"
      ],
      "text/latex": [
       "$-299.3002870483604\\ \\frac{\\mathrm{kilojoule}}{\\mathrm{mole}}$"
      ],
      "text/plain": [
       "<Quantity(-299.300287, 'kilojoule / mole')>"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "sm = abdulelah_gani.get_groups(wrong_smiles, \"smiles\")\n",
    "\n",
    "sm.ig_formation_gibbs"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "ugropy",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.12"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}