{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "from fastai.text import * \n", "import os\n", "import numpy as np" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "'/home/ubuntu/Austin_endoscopy'" ] }, "execution_count": 2, "metadata": {}, "output_type": "execute_result" } ], "source": [ "os.getcwd()" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "filestem = 'Total_ECE_Copy.xlsx'" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "path = os.getcwd()" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Entire Colon ExaminedAdvanced To
01the terminal ileum
11the terminal ileum
21the terminal ileum
31the terminal ileum
41the terminal ileum
51the caecum, identified by appendiceal orifice ...
61the caecum, identified by appendiceal orifice ...
71the terminal ileum
81the caecum, identified by appendiceal orifice ...
91the caecum, identified by appendiceal orifice ...
101the caecum, identified by appendiceal orifice ...
111the terminal ileum
121the terminal ileum
131the caecum, identified by appendiceal orifice ...
141the terminal ileum
151the caecum, identified by appendiceal orifice ...
161the caecum, identified by appendiceal orifice ...
171the terminal ileum
181the caecum, identified by its appearance
191the terminal ileum
201the ileocaecal valve
211the terminal ileum
221the terminal ileum
231the terminal ileum
241the terminal ileum
251the terminal ileum
261the caecum, identified by appendiceal orifice ...
271the terminal ileum
281the terminal ileum
291the terminal ileum
.........
26781the terminal ileum
2679120 cm into the ileum
26801the terminal ileum
26811the terminal ileum
26821the caecum, identified by appendiceal orifice ...
26831the terminal ileum
26841the terminal ileum
26851the terminal ileum
26861the terminal ileum
26871the caecum, identified by appendiceal orifice ...
26881the terminal ileum, with identification of the...
26891the terminal ileum, with identification of the...
26901the caecum, identified by appendiceal orifice ...
26911the terminal ileum, with identification of the...
26921the caecum, identified by appendiceal orifice ...
26931the terminal ileum
26941Caecum
26951the terminal ileum, with identification of the...
26960the ileocolonic anastomosis
26971the terminal ileum
26981the caecum, identified by appendiceal orifice ...
26991the terminal ileum
27001the terminal ileum, with identification of the...
27011the terminal ileum
27021the terminal ileum, with identification of the...
27031the terminal ileum
27040the ileocolonic anastomosis
27051the terminal ileum
27061the caecum, identified by the appendiceal orifice
27070the ascending colon
\n", "

2708 rows × 2 columns

\n", "
" ], "text/plain": [ " Entire Colon Examined Advanced To\n", "0 1 the terminal ileum\n", "1 1 the terminal ileum\n", "2 1 the terminal ileum\n", "3 1 the terminal ileum\n", "4 1 the terminal ileum\n", "5 1 the caecum, identified by appendiceal orifice ...\n", "6 1 the caecum, identified by appendiceal orifice ...\n", "7 1 the terminal ileum\n", "8 1 the caecum, identified by appendiceal orifice ...\n", "9 1 the caecum, identified by appendiceal orifice ...\n", "10 1 the caecum, identified by appendiceal orifice ...\n", "11 1 the terminal ileum\n", "12 1 the terminal ileum\n", "13 1 the caecum, identified by appendiceal orifice ...\n", "14 1 the terminal ileum\n", "15 1 the caecum, identified by appendiceal orifice ...\n", "16 1 the caecum, identified by appendiceal orifice ...\n", "17 1 the terminal ileum\n", "18 1 the caecum, identified by its appearance\n", "19 1 the terminal ileum\n", "20 1 the ileocaecal valve\n", "21 1 the terminal ileum\n", "22 1 the terminal ileum\n", "23 1 the terminal ileum\n", "24 1 the terminal ileum\n", "25 1 the terminal ileum\n", "26 1 the caecum, identified by appendiceal orifice ...\n", "27 1 the terminal ileum\n", "28 1 the terminal ileum\n", "29 1 the terminal ileum\n", "... ... ...\n", "2678 1 the terminal ileum\n", "2679 1 20 cm into the ileum\n", "2680 1 the terminal ileum\n", "2681 1 the terminal ileum\n", "2682 1 the caecum, identified by appendiceal orifice ...\n", "2683 1 the terminal ileum\n", "2684 1 the terminal ileum\n", "2685 1 the terminal ileum\n", "2686 1 the terminal ileum\n", "2687 1 the caecum, identified by appendiceal orifice ...\n", "2688 1 the terminal ileum, with identification of the...\n", "2689 1 the terminal ileum, with identification of the...\n", "2690 1 the caecum, identified by appendiceal orifice ...\n", "2691 1 the terminal ileum, with identification of the...\n", "2692 1 the caecum, identified by appendiceal orifice ...\n", "2693 1 the terminal ileum\n", "2694 1 Caecum\n", "2695 1 the terminal ileum, with identification of the...\n", "2696 0 the ileocolonic anastomosis\n", "2697 1 the terminal ileum\n", "2698 1 the caecum, identified by appendiceal orifice ...\n", "2699 1 the terminal ileum\n", "2700 1 the terminal ileum, with identification of the...\n", "2701 1 the terminal ileum\n", "2702 1 the terminal ileum, with identification of the...\n", "2703 1 the terminal ileum\n", "2704 0 the ileocolonic anastomosis\n", "2705 1 the terminal ileum\n", "2706 1 the caecum, identified by the appendiceal orifice\n", "2707 0 the ascending colon\n", "\n", "[2708 rows x 2 columns]" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df = pd.read_excel(path+'/'+filestem,sheet_name=1)\n", "df\n", "columnsTitles=[\"Entire Colon Examined\",\"Advanced To\"]\n", "df=df.reindex(columns=columnsTitles)\n", "df" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [], "source": [ "msk = np.random.rand(len(df)) < 0.8\n", "train = df[msk]\n", "valid = df[~msk]" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [], "source": [ "data_lm = TextLMDataBunch.from_df(path, train_df = train, valid_df = valid)\n", "\n" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [], "source": [ "data_clas = TextClasDataBunch.from_df(path, train_df = train, valid_df = valid, vocab=data_lm.train_ds.vocab, bs=32)" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [], "source": [ "data_lm.save('data_lm_export.pkl')\n", "data_clas.save('data_clas_export.pkl')" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [ { "data": { "text/html": [ "Total time: 00:01

\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
epochtrain_lossvalid_lossaccuracytime
08.5208587.2216180.11629500:01
\n" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "learn = language_model_learner(data_lm, AWD_LSTM, drop_mult=0.5)\n", "learn.fit_one_cycle(1, 1e-2)" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "'the ileocolonic '" ] }, "execution_count": 11, "metadata": {}, "output_type": "execute_result" } ], "source": [ "learn.predict(\"the ileocolonic\")" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [], "source": [ "learn.save_encoder('ft_enc')" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [], "source": [ "learn = text_classifier_learner(data_clas, AWD_LSTM, drop_mult=0.5)\n", "learn.load_encoder('ft_enc')" ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
texttarget
xxbos the terminal ileum , with identification of the appendiceal orifice and xxup ic valve1
xxbos the terminal ileum , with identification of the appendiceal orifice and xxup ic valve1
xxbos the terminal ileum , with identification of the appendiceal orifice and xxup ic valve1
xxbos the terminal ileum , with identification of the appendiceal orifice and xxup ic valve1
xxbos the terminal ileum , with identification of the appendiceal orifice and xxup ic valve1
" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "data_clas.show_batch()\n" ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [ { "data": { "text/html": [ "Total time: 00:01

\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
epochtrain_lossvalid_lossaccuracytime
00.2720390.2565140.91746600:01
\n" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "learn.fit_one_cycle(1, 1e-2)" ] }, { "cell_type": "code", "execution_count": 16, "metadata": {}, "outputs": [], "source": [ "learn.freeze_to(-2)" ] }, { "cell_type": "code", "execution_count": 17, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(Category 1, tensor(1), tensor([0.0508, 0.9492]))" ] }, "execution_count": 17, "metadata": {}, "output_type": "execute_result" } ], "source": [ "learn.predict(\"the ileocolonic anastomosis\")" ] }, { "cell_type": "code", "execution_count": 18, "metadata": {}, "outputs": [], "source": [ "txt_ci = TextClassificationInterpretation.from_learner(learn)" ] }, { "cell_type": "code", "execution_count": 19, "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
TextPredictionActualLossProbability
xxbos the terminal ileum , with identification of the appendiceal orifice and xxup ic valve113.380.97
xxbos the caecum , identified by appendiceal orifice and ileocaecal valve113.380.97
xxbos the caecum , identified by appendiceal orifice and ileocaecal valve113.300.96
xxbos the descending colon103.300.04
xxbos the terminal ileum113.290.96
xxbos the splenic flexure103.290.04
xxbos the terminal ileum112.980.95
xxbos the terminal ileum112.980.95
xxbos the caecum , identified by appendiceal orifice and ileocaecal valve112.980.95
xxbos the terminal ileum112.980.95
xxbos the caecum , identified by the ileocaecal valve112.980.95
xxbos the ileocaecal valve112.980.95
xxbos the terminal ileum112.980.95
xxbos the ileocaecal valve112.980.95
xxbos the caecum , identified by appendiceal orifice and ileocaecal valve112.980.95
xxbos the terminal ileum112.980.95
xxbos the terminal ileum112.980.95
xxbos the caecum , identified by appendiceal orifice and ileocaecal valve112.980.95
xxbos 10 cm into the ileum112.980.95
xxbos the terminal ileum , with identification of the appendiceal orifice and xxup ic valve112.980.95
xxbos the terminal ileum112.980.95
xxbos ileum;sigmoid colon;given102.980.05
xxbos the terminal ileum , with identification of the appendiceal orifice and xxup ic valve112.980.95
xxbos the terminal ileum , with identification of the appendiceal orifice and xxup ic valve112.980.95
xxbos the terminal ileum112.980.95
xxbos the terminal ileum012.220.11
xxbos the caecum , identified by appendiceal orifice and ileocaecal valve012.220.11
xxbos the rectum102.190.11
xxbos the terminal ileum , with identification of the appendiceal orifice and xxup ic valve112.190.89
xxbos the terminal ileum , with identification of the appendiceal orifice and xxup ic valve112.110.88
xxbos the caecum , identified by the ileocaecal valve112.110.88
xxbos the terminal ileum112.110.88
xxbos the terminal ileum112.110.88
xxbos the caecum , identified by appendiceal orifice and ileocaecal valve112.110.88
xxbos the caecum , identified by appendiceal orifice and ileocaecal valve112.110.88
xxbos the caecum , identified by appendiceal orifice and ileocaecal valve112.110.88
xxbos the caecum , identified by appendiceal orifice and ileocaecal valve111.780.83
xxbos the terminal ileum111.780.83
xxbos the terminal ileum111.640.81
xxbos the terminal ileum111.640.81
xxbos the terminal ileum011.480.23
xxbos the terminal ileum011.480.23
xxbos the ileocolonic anastomosis100.750.47
xxbos the terminal ileum010.550.42
xxbos the terminal ileum010.520.41
xxbos the terminal ileum010.460.37
xxbos the caecum , identified by appendiceal orifice and ileocaecal valve110.320.73
xxbos the terminal ileum110.270.76
xxbos the terminal ileum , with identification of the appendiceal orifice and xxup ic valve010.260.23
xxbos the terminal ileum , with identification of the appendiceal orifice and xxup ic valve110.250.78
" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "txt_ci.show_top_losses(50)\n" ] }, { "cell_type": "code", "execution_count": 22, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(Category 1, tensor(1), tensor([3.1341e-04, 9.9969e-01]))" ] }, "execution_count": 22, "metadata": {}, "output_type": "execute_result" } ], "source": [ "learn.predict(\"the terminal ileum\")" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "pytorch_fastai", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.1" }, "varInspector": { "cols": { "lenName": 16, "lenType": 16, "lenVar": 40 }, "kernels_config": { "python": { "delete_cmd_postfix": "", "delete_cmd_prefix": "del ", "library": "var_list.py", "varRefreshCmd": "print(var_dic_list())" }, "r": { "delete_cmd_postfix": ") ", "delete_cmd_prefix": "rm(", "library": "var_list.r", "varRefreshCmd": "cat(var_dic_list()) " } }, "types_to_exclude": [ "module", "function", "builtin_function_or_method", "instance", "_Feature" ], "window_display": false } }, "nbformat": 4, "nbformat_minor": 2 }