{
  "nbformat": 4,
  "nbformat_minor": 0,
  "metadata": {
    "colab": {
      "name": "1_NLP_StoryTelling.ipynb",
      "provenance": [],
      "collapsed_sections": [],
      "authorship_tag": "ABX9TyPQ3W15ZgzmdHqSRyajXgKm",
      "include_colab_link": true
    },
    "kernelspec": {
      "name": "python3",
      "display_name": "Python 3"
    },
    "language_info": {
      "name": "python"
    },
    "widgets": {
      "application/vnd.jupyter.widget-state+json": {
        "4eedf1ebd7724c47886dbaf8c432fa27": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HBoxModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HBoxModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HBoxView",
            "box_style": "",
            "children": [
              "IPY_MODEL_2ae174538489490abac7de3fe89b72f9",
              "IPY_MODEL_1a3fd53fe31a403f8af07fb4e559cd9a",
              "IPY_MODEL_dbf7cac41e0845088ae3d2de6e6685fd"
            ],
            "layout": "IPY_MODEL_811692c4bfb14ab9bdf0944a185bcdea"
          }
        },
        "2ae174538489490abac7de3fe89b72f9": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HTMLModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HTMLView",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_4d638d54f81b4450978a4758ca3a4c3c",
            "placeholder": "​",
            "style": "IPY_MODEL_e001a703a4bb4cf6a6fbbc53a35ca1f6",
            "value": "Downloading https://raw.githubusercontent.com/stanfordnlp/stanza-resources/main/resources_1.4.0.json: "
          }
        },
        "1a3fd53fe31a403f8af07fb4e559cd9a": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "FloatProgressModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "FloatProgressModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "ProgressView",
            "bar_style": "success",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_2e1edd0cd4184d6288ea1a48c5a3c54e",
            "max": 25998,
            "min": 0,
            "orientation": "horizontal",
            "style": "IPY_MODEL_ad283e7f367a4613a103cf73d6c95b11",
            "value": 25998
          }
        },
        "dbf7cac41e0845088ae3d2de6e6685fd": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HTMLModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HTMLView",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_077c55a8e44c447387f24a35c4560891",
            "placeholder": "​",
            "style": "IPY_MODEL_112082ac7aba40caa03ac0c1b5db5b04",
            "value": " 154k/? [00:00&lt;00:00, 3.40MB/s]"
          }
        },
        "811692c4bfb14ab9bdf0944a185bcdea": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "4d638d54f81b4450978a4758ca3a4c3c": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "e001a703a4bb4cf6a6fbbc53a35ca1f6": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "DescriptionStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "description_width": ""
          }
        },
        "2e1edd0cd4184d6288ea1a48c5a3c54e": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "ad283e7f367a4613a103cf73d6c95b11": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "ProgressStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "ProgressStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "bar_color": null,
            "description_width": ""
          }
        },
        "077c55a8e44c447387f24a35c4560891": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "112082ac7aba40caa03ac0c1b5db5b04": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "DescriptionStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "description_width": ""
          }
        },
        "ae3f920b0f2643aeb9620cd9eb6110e9": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HBoxModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HBoxModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HBoxView",
            "box_style": "",
            "children": [
              "IPY_MODEL_cd6dc76d3ac34df79ced860e7c9d31de",
              "IPY_MODEL_ae75f7bb1b7c494dbb2ef8f23786f496",
              "IPY_MODEL_b040c9e48a7c4c8080747e6eefbd8242"
            ],
            "layout": "IPY_MODEL_464ceb334022420ca73234932f45e434"
          }
        },
        "cd6dc76d3ac34df79ced860e7c9d31de": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HTMLModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HTMLView",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_e75a0779c3b744eba444f727e9b5b0b9",
            "placeholder": "​",
            "style": "IPY_MODEL_72bdf9e623854a288c1d7bbb3ad5bf15",
            "value": "Downloading https://huggingface.co/stanfordnlp/stanza-en/resolve/v1.4.0/models/default.zip: 100%"
          }
        },
        "ae75f7bb1b7c494dbb2ef8f23786f496": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "FloatProgressModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "FloatProgressModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "ProgressView",
            "bar_style": "success",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_f6c3a13239ce449695bd7ba25a1eaa37",
            "max": 479293702,
            "min": 0,
            "orientation": "horizontal",
            "style": "IPY_MODEL_a0b2730e6a53429ea3269d85cf531541",
            "value": 479293702
          }
        },
        "b040c9e48a7c4c8080747e6eefbd8242": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HTMLModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HTMLView",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_ee46b4454f934db5b1eed5dcfd857188",
            "placeholder": "​",
            "style": "IPY_MODEL_da524a9f74024d8296896804ae156e00",
            "value": " 479M/479M [00:08&lt;00:00, 50.0MB/s]"
          }
        },
        "464ceb334022420ca73234932f45e434": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "e75a0779c3b744eba444f727e9b5b0b9": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "72bdf9e623854a288c1d7bbb3ad5bf15": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "DescriptionStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "description_width": ""
          }
        },
        "f6c3a13239ce449695bd7ba25a1eaa37": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "a0b2730e6a53429ea3269d85cf531541": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "ProgressStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "ProgressStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "bar_color": null,
            "description_width": ""
          }
        },
        "ee46b4454f934db5b1eed5dcfd857188": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "da524a9f74024d8296896804ae156e00": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "DescriptionStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "description_width": ""
          }
        },
        "f96daf5dac8648389d27189d0a00b6d3": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HBoxModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HBoxModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HBoxView",
            "box_style": "",
            "children": [
              "IPY_MODEL_85f56de7a9954e26abba220c82075d45",
              "IPY_MODEL_c40d78fdad3f4a259bcc7bf7bfa8c43b",
              "IPY_MODEL_672f5266150e4ba9bfb7b6e3a4e86879"
            ],
            "layout": "IPY_MODEL_267e1b66cb424f19b8a1c8e20f98fa6e"
          }
        },
        "85f56de7a9954e26abba220c82075d45": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HTMLModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HTMLView",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_e8de14a6fb5741338d8cf77c956b3761",
            "placeholder": "​",
            "style": "IPY_MODEL_51d05cd434034cdea16a1a533aa15faf",
            "value": "Downloading https://raw.githubusercontent.com/stanfordnlp/stanza-resources/main/resources_1.4.0.json: "
          }
        },
        "c40d78fdad3f4a259bcc7bf7bfa8c43b": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "FloatProgressModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "FloatProgressModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "ProgressView",
            "bar_style": "success",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_35979960457f4233bcd32d136408a816",
            "max": 25998,
            "min": 0,
            "orientation": "horizontal",
            "style": "IPY_MODEL_785b0d3affed4e87be0c418815247b83",
            "value": 25998
          }
        },
        "672f5266150e4ba9bfb7b6e3a4e86879": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HTMLModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HTMLView",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_f6c91f37203141f881219d5b0f869ad7",
            "placeholder": "​",
            "style": "IPY_MODEL_6b56d9f3d5734b2aaac1089e3b492a83",
            "value": " 154k/? [00:00&lt;00:00, 2.64MB/s]"
          }
        },
        "267e1b66cb424f19b8a1c8e20f98fa6e": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "e8de14a6fb5741338d8cf77c956b3761": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "51d05cd434034cdea16a1a533aa15faf": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "DescriptionStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "description_width": ""
          }
        },
        "35979960457f4233bcd32d136408a816": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "785b0d3affed4e87be0c418815247b83": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "ProgressStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "ProgressStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "bar_color": null,
            "description_width": ""
          }
        },
        "f6c91f37203141f881219d5b0f869ad7": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "6b56d9f3d5734b2aaac1089e3b492a83": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "DescriptionStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "description_width": ""
          }
        }
      }
    }
  },
  "cells": [
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "view-in-github",
        "colab_type": "text"
      },
      "source": [
        "<a href=\"https://colab.research.google.com/github/rajeshradhakrishnanmvk/kitchen2.0/blob/feature101-frontend/ml/1_NLP_StoryTelling.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 1,
      "metadata": {
        "id": "ShG7yaY20_jg",
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "outputId": "bba7084f-b552-4027-cb09-858e6c549bc1"
      },
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "\u001b[K     |████████████████████████████████| 574 kB 5.1 MB/s \n",
            "\u001b[K     |████████████████████████████████| 4.7 MB 33.9 MB/s \n",
            "\u001b[K     |████████████████████████████████| 197 kB 68.0 MB/s \n",
            "\u001b[K     |████████████████████████████████| 6.6 MB 45.7 MB/s \n",
            "\u001b[K     |████████████████████████████████| 120 kB 56.5 MB/s \n",
            "\u001b[?25h  Building wheel for emoji (setup.py) ... \u001b[?25l\u001b[?25hdone\n"
          ]
        }
      ],
      "source": [
        "!pip install -Uqq stanza"
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "import stanza\n",
        "import nltk\n",
        "from nltk.corpus import wordnet\n",
        "from nltk.corpus import verbnet\n",
        "\n",
        "stanza.download('en') # download English model\n",
        "nlp = stanza.Pipeline('en', processors='tokenize,pos,lemma,depparse,ner')\n",
        "\n",
        "nltk.download('omw-1.4')\n",
        "nltk.download('wordnet')\n",
        "nltk.download('verbnet')"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 558,
          "referenced_widgets": [
            "4eedf1ebd7724c47886dbaf8c432fa27",
            "2ae174538489490abac7de3fe89b72f9",
            "1a3fd53fe31a403f8af07fb4e559cd9a",
            "dbf7cac41e0845088ae3d2de6e6685fd",
            "811692c4bfb14ab9bdf0944a185bcdea",
            "4d638d54f81b4450978a4758ca3a4c3c",
            "e001a703a4bb4cf6a6fbbc53a35ca1f6",
            "2e1edd0cd4184d6288ea1a48c5a3c54e",
            "ad283e7f367a4613a103cf73d6c95b11",
            "077c55a8e44c447387f24a35c4560891",
            "112082ac7aba40caa03ac0c1b5db5b04",
            "ae3f920b0f2643aeb9620cd9eb6110e9",
            "cd6dc76d3ac34df79ced860e7c9d31de",
            "ae75f7bb1b7c494dbb2ef8f23786f496",
            "b040c9e48a7c4c8080747e6eefbd8242",
            "464ceb334022420ca73234932f45e434",
            "e75a0779c3b744eba444f727e9b5b0b9",
            "72bdf9e623854a288c1d7bbb3ad5bf15",
            "f6c3a13239ce449695bd7ba25a1eaa37",
            "a0b2730e6a53429ea3269d85cf531541",
            "ee46b4454f934db5b1eed5dcfd857188",
            "da524a9f74024d8296896804ae156e00",
            "f96daf5dac8648389d27189d0a00b6d3",
            "85f56de7a9954e26abba220c82075d45",
            "c40d78fdad3f4a259bcc7bf7bfa8c43b",
            "672f5266150e4ba9bfb7b6e3a4e86879",
            "267e1b66cb424f19b8a1c8e20f98fa6e",
            "e8de14a6fb5741338d8cf77c956b3761",
            "51d05cd434034cdea16a1a533aa15faf",
            "35979960457f4233bcd32d136408a816",
            "785b0d3affed4e87be0c418815247b83",
            "f6c91f37203141f881219d5b0f869ad7",
            "6b56d9f3d5734b2aaac1089e3b492a83"
          ]
        },
        "id": "RMLlmBIc85_s",
        "outputId": "ee4afc3d-a88c-4a81-8af8-66d0d697925b"
      },
      "execution_count": 2,
      "outputs": [
        {
          "output_type": "display_data",
          "data": {
            "text/plain": [
              "Downloading https://raw.githubusercontent.com/stanfordnlp/stanza-resources/main/resources_1.4.0.json:   0%|   …"
            ],
            "application/vnd.jupyter.widget-view+json": {
              "version_major": 2,
              "version_minor": 0,
              "model_id": "4eedf1ebd7724c47886dbaf8c432fa27"
            }
          },
          "metadata": {}
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "INFO:stanza:Downloading default packages for language: en (English)...\n"
          ]
        },
        {
          "output_type": "display_data",
          "data": {
            "text/plain": [
              "Downloading https://huggingface.co/stanfordnlp/stanza-en/resolve/v1.4.0/models/default.zip:   0%|          | 0…"
            ],
            "application/vnd.jupyter.widget-view+json": {
              "version_major": 2,
              "version_minor": 0,
              "model_id": "ae3f920b0f2643aeb9620cd9eb6110e9"
            }
          },
          "metadata": {}
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "INFO:stanza:Finished downloading models and saved to /root/stanza_resources.\n"
          ]
        },
        {
          "output_type": "display_data",
          "data": {
            "text/plain": [
              "Downloading https://raw.githubusercontent.com/stanfordnlp/stanza-resources/main/resources_1.4.0.json:   0%|   …"
            ],
            "application/vnd.jupyter.widget-view+json": {
              "version_major": 2,
              "version_minor": 0,
              "model_id": "f96daf5dac8648389d27189d0a00b6d3"
            }
          },
          "metadata": {}
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "INFO:stanza:Loading these models for language: en (English):\n",
            "=========================\n",
            "| Processor | Package   |\n",
            "-------------------------\n",
            "| tokenize  | combined  |\n",
            "| pos       | combined  |\n",
            "| lemma     | combined  |\n",
            "| depparse  | combined  |\n",
            "| ner       | ontonotes |\n",
            "=========================\n",
            "\n",
            "INFO:stanza:Use device: cpu\n",
            "INFO:stanza:Loading: tokenize\n",
            "INFO:stanza:Loading: pos\n",
            "INFO:stanza:Loading: lemma\n",
            "INFO:stanza:Loading: depparse\n",
            "INFO:stanza:Loading: ner\n",
            "INFO:stanza:Done loading processors!\n",
            "[nltk_data] Downloading package omw-1.4 to /root/nltk_data...\n",
            "[nltk_data] Downloading package wordnet to /root/nltk_data...\n",
            "[nltk_data] Downloading package verbnet to /root/nltk_data...\n",
            "[nltk_data]   Unzipping corpora/verbnet.zip.\n"
          ]
        },
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "True"
            ]
          },
          "metadata": {},
          "execution_count": 2
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "def events(text):\n",
        "  '''\n",
        "  as per Page: 32\n",
        "  inputs plot text\n",
        "  returns (s, v, o, m)\n",
        "  where v is a verb, \n",
        "  s is the subject of the verb,\n",
        "  o is the object of the verb, \n",
        "  and m is the modifier—or “wildcard”, \n",
        "  which can be a propositional object, \n",
        "  indirect object, causal complement \n",
        "  (e.g., in “I was glad that he drove,” “drove” is the causal complement to “glad.”), \n",
        "  or any other dependency unclassifiable to Stanford’s dependency parser.\n",
        "  '''\n",
        "  doc = nlp(text)\n",
        "\n",
        "  v,s,o,m = 'UNK','UNK','UNK','UNK'\n",
        "  for sent in doc.sentences:\n",
        "    for word in sent.words:\n",
        "      if word.pos == \"VERB\":\n",
        "        # Generalized\n",
        "        # 3. Verbs were replaced by VerbNet [79] version 3.2.43 frames (e.g. “arrived”/“arriving” become “escape-51.1”)\n",
        "        # TODO: verbnet.classids('scurry')\n",
        "        gVerb = verbnet.classids(word.text) #.split('.')[0]\n",
        "        v =  gVerb[0].split('.')[0] if gVerb else word.text #verb\n",
        "      if word.deprel == \"nsubj\" and word.pos == \"PRON\":\n",
        "        s = word.text #subject of the verb\n",
        "      if o == 'UNK' and word.deprel == \"obl\" and word.pos == \"NOUN\":\n",
        "        # Generalized\n",
        "        if  word.pos == \"NOUN\":\n",
        "          # 1. Named entities were identified (cf. [77]), and “PERSON” names were replaced with the tag <PERSON>n, \n",
        "          #    where n indicates the nth character name in the sentence. Other named entities were labelled as their \n",
        "          #    named entity recognition (NER) category (e.g. LOCATION, ORGANIZATION, etc.)\n",
        "          # 2. nouns were replaced by the WordNet [78] Synset two levels up in the inherited hypernym hierarchy\n",
        "          # TODO: woi.hypernyms()[0]\n",
        "          # e.g. self-propelled vehicle.n.01 vs the original word “car” (car.n.01)), while avoiding labelling it too generally (e.g. entity.n.01\n",
        "          synset = wordnet.synsets(word.text)\n",
        "          if synset:\n",
        "            woi = synset[0] if synset[0] else word.text\n",
        "            o = woi.hypernyms()[0].name().split('.')[0]\n",
        "          else:\n",
        "            o = word.text\n",
        "        else:\n",
        "          o = word.text #object of the verb\n",
        "        #Character Name Numbering\n",
        "\n",
        "        #Adding Genre Information\n",
        "      if  word.deprel == \"det\":\n",
        "        m = sent.words[word.head-1].text #modifier\n",
        "  return s + \" \" + v + \" \" + o + \" \" +  m"
      ],
      "metadata": {
        "id": "z0vnU5-n1IPU"
      },
      "execution_count": 11,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "!wget https://www.dropbox.com/s/24pa44w7u7wvtma/plots.zip"
      ],
      "metadata": {
        "id": "XZ3gIRv39WJ0"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "!unzip plots.zip"
      ],
      "metadata": {
        "id": "1umsRu3M9W1j"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "import pandas as pd\n",
        "\n",
        "def parseFile():\n",
        "    with open('plots','r') as f:\n",
        "        lines = [line.split(\"<EOS>\")[-1] for line in f if line.strip()]\n",
        "    return lines\n",
        "\n",
        "df = pd.DataFrame(parseFile(), columns=[\"plots\"])\n",
        "\n",
        "df.head()"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 206
        },
        "id": "M5tjXU_k9IUt",
        "outputId": "6bdad539-fc4c-4dac-c93a-76ea29210a19"
      },
      "execution_count": 4,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "                                               plots\n",
              "0  Old Major, the old boar on the Manor Farm, sum...\n",
              "1  When Major dies, two young pigs, Snowball and ...\n",
              "2  The animals revolt and drive the drunken and i...\n",
              "3  They adopt the Seven Commandments of Animalism...\n",
              "4  Snowball teaches the animals to read and write..."
            ],
            "text/html": [
              "\n",
              "  <div id=\"df-03384730-8db1-49d2-ab6b-03e9839ccded\">\n",
              "    <div class=\"colab-df-container\">\n",
              "      <div>\n",
              "<style scoped>\n",
              "    .dataframe tbody tr th:only-of-type {\n",
              "        vertical-align: middle;\n",
              "    }\n",
              "\n",
              "    .dataframe tbody tr th {\n",
              "        vertical-align: top;\n",
              "    }\n",
              "\n",
              "    .dataframe thead th {\n",
              "        text-align: right;\n",
              "    }\n",
              "</style>\n",
              "<table border=\"1\" class=\"dataframe\">\n",
              "  <thead>\n",
              "    <tr style=\"text-align: right;\">\n",
              "      <th></th>\n",
              "      <th>plots</th>\n",
              "    </tr>\n",
              "  </thead>\n",
              "  <tbody>\n",
              "    <tr>\n",
              "      <th>0</th>\n",
              "      <td>Old Major, the old boar on the Manor Farm, sum...</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>1</th>\n",
              "      <td>When Major dies, two young pigs, Snowball and ...</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>2</th>\n",
              "      <td>The animals revolt and drive the drunken and i...</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>3</th>\n",
              "      <td>They adopt the Seven Commandments of Animalism...</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>4</th>\n",
              "      <td>Snowball teaches the animals to read and write...</td>\n",
              "    </tr>\n",
              "  </tbody>\n",
              "</table>\n",
              "</div>\n",
              "      <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-03384730-8db1-49d2-ab6b-03e9839ccded')\"\n",
              "              title=\"Convert this dataframe to an interactive table.\"\n",
              "              style=\"display:none;\">\n",
              "        \n",
              "  <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n",
              "       width=\"24px\">\n",
              "    <path d=\"M0 0h24v24H0V0z\" fill=\"none\"/>\n",
              "    <path d=\"M18.56 5.44l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94zm-11 1L8.5 8.5l.94-2.06 2.06-.94-2.06-.94L8.5 2.5l-.94 2.06-2.06.94zm10 10l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94z\"/><path d=\"M17.41 7.96l-1.37-1.37c-.4-.4-.92-.59-1.43-.59-.52 0-1.04.2-1.43.59L10.3 9.45l-7.72 7.72c-.78.78-.78 2.05 0 2.83L4 21.41c.39.39.9.59 1.41.59.51 0 1.02-.2 1.41-.59l7.78-7.78 2.81-2.81c.8-.78.8-2.07 0-2.86zM5.41 20L4 18.59l7.72-7.72 1.47 1.35L5.41 20z\"/>\n",
              "  </svg>\n",
              "      </button>\n",
              "      \n",
              "  <style>\n",
              "    .colab-df-container {\n",
              "      display:flex;\n",
              "      flex-wrap:wrap;\n",
              "      gap: 12px;\n",
              "    }\n",
              "\n",
              "    .colab-df-convert {\n",
              "      background-color: #E8F0FE;\n",
              "      border: none;\n",
              "      border-radius: 50%;\n",
              "      cursor: pointer;\n",
              "      display: none;\n",
              "      fill: #1967D2;\n",
              "      height: 32px;\n",
              "      padding: 0 0 0 0;\n",
              "      width: 32px;\n",
              "    }\n",
              "\n",
              "    .colab-df-convert:hover {\n",
              "      background-color: #E2EBFA;\n",
              "      box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
              "      fill: #174EA6;\n",
              "    }\n",
              "\n",
              "    [theme=dark] .colab-df-convert {\n",
              "      background-color: #3B4455;\n",
              "      fill: #D2E3FC;\n",
              "    }\n",
              "\n",
              "    [theme=dark] .colab-df-convert:hover {\n",
              "      background-color: #434B5C;\n",
              "      box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
              "      filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
              "      fill: #FFFFFF;\n",
              "    }\n",
              "  </style>\n",
              "\n",
              "      <script>\n",
              "        const buttonEl =\n",
              "          document.querySelector('#df-03384730-8db1-49d2-ab6b-03e9839ccded button.colab-df-convert');\n",
              "        buttonEl.style.display =\n",
              "          google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
              "\n",
              "        async function convertToInteractive(key) {\n",
              "          const element = document.querySelector('#df-03384730-8db1-49d2-ab6b-03e9839ccded');\n",
              "          const dataTable =\n",
              "            await google.colab.kernel.invokeFunction('convertToInteractive',\n",
              "                                                     [key], {});\n",
              "          if (!dataTable) return;\n",
              "\n",
              "          const docLinkHtml = 'Like what you see? Visit the ' +\n",
              "            '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n",
              "            + ' to learn more about interactive tables.';\n",
              "          element.innerHTML = '';\n",
              "          dataTable['output_type'] = 'display_data';\n",
              "          await google.colab.output.renderOutput(dataTable, element);\n",
              "          const docLink = document.createElement('div');\n",
              "          docLink.innerHTML = docLinkHtml;\n",
              "          element.appendChild(docLink);\n",
              "        }\n",
              "      </script>\n",
              "    </div>\n",
              "  </div>\n",
              "  "
            ]
          },
          "metadata": {},
          "execution_count": 4
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "df.count()"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "-yNA-gpGGCHk",
        "outputId": "cb0df6ff-43ad-471d-c190-32b2c53a4c6d"
      },
      "execution_count": 6,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "plots    121649\n",
              "dtype: int64"
            ]
          },
          "metadata": {},
          "execution_count": 6
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "train = df.loc[9:25, ['plots']]"
      ],
      "metadata": {
        "id": "E_VDLWZZ9Iok"
      },
      "execution_count": 5,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "train.head()"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 206
        },
        "id": "vytT-pIXGK6L",
        "outputId": "3de1bf33-ee5e-4552-a6a4-1cb5e2435268"
      },
      "execution_count": 9,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "                                                plots\n",
              "9   Snowball, who has been studying the battles of...\n",
              "10  Snowball's popularity soars, and this event is...\n",
              "11  It is celebrated annually with the firing of a...\n",
              "12  Napoleon and Snowball struggle for pre-eminenc...\n",
              "13  When Snowball announces his plans to modernize..."
            ],
            "text/html": [
              "\n",
              "  <div id=\"df-27905788-9d0e-42c9-a728-1b0ea8d46d25\">\n",
              "    <div class=\"colab-df-container\">\n",
              "      <div>\n",
              "<style scoped>\n",
              "    .dataframe tbody tr th:only-of-type {\n",
              "        vertical-align: middle;\n",
              "    }\n",
              "\n",
              "    .dataframe tbody tr th {\n",
              "        vertical-align: top;\n",
              "    }\n",
              "\n",
              "    .dataframe thead th {\n",
              "        text-align: right;\n",
              "    }\n",
              "</style>\n",
              "<table border=\"1\" class=\"dataframe\">\n",
              "  <thead>\n",
              "    <tr style=\"text-align: right;\">\n",
              "      <th></th>\n",
              "      <th>plots</th>\n",
              "    </tr>\n",
              "  </thead>\n",
              "  <tbody>\n",
              "    <tr>\n",
              "      <th>9</th>\n",
              "      <td>Snowball, who has been studying the battles of...</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>10</th>\n",
              "      <td>Snowball's popularity soars, and this event is...</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>11</th>\n",
              "      <td>It is celebrated annually with the firing of a...</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>12</th>\n",
              "      <td>Napoleon and Snowball struggle for pre-eminenc...</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>13</th>\n",
              "      <td>When Snowball announces his plans to modernize...</td>\n",
              "    </tr>\n",
              "  </tbody>\n",
              "</table>\n",
              "</div>\n",
              "      <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-27905788-9d0e-42c9-a728-1b0ea8d46d25')\"\n",
              "              title=\"Convert this dataframe to an interactive table.\"\n",
              "              style=\"display:none;\">\n",
              "        \n",
              "  <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n",
              "       width=\"24px\">\n",
              "    <path d=\"M0 0h24v24H0V0z\" fill=\"none\"/>\n",
              "    <path d=\"M18.56 5.44l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94zm-11 1L8.5 8.5l.94-2.06 2.06-.94-2.06-.94L8.5 2.5l-.94 2.06-2.06.94zm10 10l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94z\"/><path d=\"M17.41 7.96l-1.37-1.37c-.4-.4-.92-.59-1.43-.59-.52 0-1.04.2-1.43.59L10.3 9.45l-7.72 7.72c-.78.78-.78 2.05 0 2.83L4 21.41c.39.39.9.59 1.41.59.51 0 1.02-.2 1.41-.59l7.78-7.78 2.81-2.81c.8-.78.8-2.07 0-2.86zM5.41 20L4 18.59l7.72-7.72 1.47 1.35L5.41 20z\"/>\n",
              "  </svg>\n",
              "      </button>\n",
              "      \n",
              "  <style>\n",
              "    .colab-df-container {\n",
              "      display:flex;\n",
              "      flex-wrap:wrap;\n",
              "      gap: 12px;\n",
              "    }\n",
              "\n",
              "    .colab-df-convert {\n",
              "      background-color: #E8F0FE;\n",
              "      border: none;\n",
              "      border-radius: 50%;\n",
              "      cursor: pointer;\n",
              "      display: none;\n",
              "      fill: #1967D2;\n",
              "      height: 32px;\n",
              "      padding: 0 0 0 0;\n",
              "      width: 32px;\n",
              "    }\n",
              "\n",
              "    .colab-df-convert:hover {\n",
              "      background-color: #E2EBFA;\n",
              "      box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
              "      fill: #174EA6;\n",
              "    }\n",
              "\n",
              "    [theme=dark] .colab-df-convert {\n",
              "      background-color: #3B4455;\n",
              "      fill: #D2E3FC;\n",
              "    }\n",
              "\n",
              "    [theme=dark] .colab-df-convert:hover {\n",
              "      background-color: #434B5C;\n",
              "      box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
              "      filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
              "      fill: #FFFFFF;\n",
              "    }\n",
              "  </style>\n",
              "\n",
              "      <script>\n",
              "        const buttonEl =\n",
              "          document.querySelector('#df-27905788-9d0e-42c9-a728-1b0ea8d46d25 button.colab-df-convert');\n",
              "        buttonEl.style.display =\n",
              "          google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
              "\n",
              "        async function convertToInteractive(key) {\n",
              "          const element = document.querySelector('#df-27905788-9d0e-42c9-a728-1b0ea8d46d25');\n",
              "          const dataTable =\n",
              "            await google.colab.kernel.invokeFunction('convertToInteractive',\n",
              "                                                     [key], {});\n",
              "          if (!dataTable) return;\n",
              "\n",
              "          const docLinkHtml = 'Like what you see? Visit the ' +\n",
              "            '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n",
              "            + ' to learn more about interactive tables.';\n",
              "          element.innerHTML = '';\n",
              "          dataTable['output_type'] = 'display_data';\n",
              "          await google.colab.output.renderOutput(dataTable, element);\n",
              "          const docLink = document.createElement('div');\n",
              "          docLink.innerHTML = docLinkHtml;\n",
              "          element.appendChild(docLink);\n",
              "        }\n",
              "      </script>\n",
              "    </div>\n",
              "  </div>\n",
              "  "
            ]
          },
          "metadata": {},
          "execution_count": 9
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "train['event'] = train.apply(lambda row: events(row.plots), axis=1)"
      ],
      "metadata": {
        "id": "mi-IBVZTGQH9"
      },
      "execution_count": 12,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "train.head()"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 206
        },
        "id": "1c5clUA_Gp4L",
        "outputId": "ea429a37-1235-4cb4-b335-bb2e2e1ad984"
      },
      "execution_count": 13,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "                                                plots  \\\n",
              "9   Snowball, who has been studying the battles of...   \n",
              "10  Snowball's popularity soars, and this event is...   \n",
              "11  It is celebrated annually with the firing of a...   \n",
              "12  Napoleon and Snowball struggle for pre-eminenc...   \n",
              "13  When Snowball announces his plans to modernize...   \n",
              "\n",
              "                               event  \n",
              "9      who escape-51 expectation men  \n",
              "10        UNK proclaimed UNK Cowshed  \n",
              "11  UNK celebrated attack Revolution  \n",
              "12    UNK battle-36 pre-eminence UNK  \n",
              "13         UNK declares UNK windmill  "
            ],
            "text/html": [
              "\n",
              "  <div id=\"df-5a898961-e422-44be-b073-e944a7e7d1dc\">\n",
              "    <div class=\"colab-df-container\">\n",
              "      <div>\n",
              "<style scoped>\n",
              "    .dataframe tbody tr th:only-of-type {\n",
              "        vertical-align: middle;\n",
              "    }\n",
              "\n",
              "    .dataframe tbody tr th {\n",
              "        vertical-align: top;\n",
              "    }\n",
              "\n",
              "    .dataframe thead th {\n",
              "        text-align: right;\n",
              "    }\n",
              "</style>\n",
              "<table border=\"1\" class=\"dataframe\">\n",
              "  <thead>\n",
              "    <tr style=\"text-align: right;\">\n",
              "      <th></th>\n",
              "      <th>plots</th>\n",
              "      <th>event</th>\n",
              "    </tr>\n",
              "  </thead>\n",
              "  <tbody>\n",
              "    <tr>\n",
              "      <th>9</th>\n",
              "      <td>Snowball, who has been studying the battles of...</td>\n",
              "      <td>who escape-51 expectation men</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>10</th>\n",
              "      <td>Snowball's popularity soars, and this event is...</td>\n",
              "      <td>UNK proclaimed UNK Cowshed</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>11</th>\n",
              "      <td>It is celebrated annually with the firing of a...</td>\n",
              "      <td>UNK celebrated attack Revolution</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>12</th>\n",
              "      <td>Napoleon and Snowball struggle for pre-eminenc...</td>\n",
              "      <td>UNK battle-36 pre-eminence UNK</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>13</th>\n",
              "      <td>When Snowball announces his plans to modernize...</td>\n",
              "      <td>UNK declares UNK windmill</td>\n",
              "    </tr>\n",
              "  </tbody>\n",
              "</table>\n",
              "</div>\n",
              "      <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-5a898961-e422-44be-b073-e944a7e7d1dc')\"\n",
              "              title=\"Convert this dataframe to an interactive table.\"\n",
              "              style=\"display:none;\">\n",
              "        \n",
              "  <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n",
              "       width=\"24px\">\n",
              "    <path d=\"M0 0h24v24H0V0z\" fill=\"none\"/>\n",
              "    <path d=\"M18.56 5.44l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94zm-11 1L8.5 8.5l.94-2.06 2.06-.94-2.06-.94L8.5 2.5l-.94 2.06-2.06.94zm10 10l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94z\"/><path d=\"M17.41 7.96l-1.37-1.37c-.4-.4-.92-.59-1.43-.59-.52 0-1.04.2-1.43.59L10.3 9.45l-7.72 7.72c-.78.78-.78 2.05 0 2.83L4 21.41c.39.39.9.59 1.41.59.51 0 1.02-.2 1.41-.59l7.78-7.78 2.81-2.81c.8-.78.8-2.07 0-2.86zM5.41 20L4 18.59l7.72-7.72 1.47 1.35L5.41 20z\"/>\n",
              "  </svg>\n",
              "      </button>\n",
              "      \n",
              "  <style>\n",
              "    .colab-df-container {\n",
              "      display:flex;\n",
              "      flex-wrap:wrap;\n",
              "      gap: 12px;\n",
              "    }\n",
              "\n",
              "    .colab-df-convert {\n",
              "      background-color: #E8F0FE;\n",
              "      border: none;\n",
              "      border-radius: 50%;\n",
              "      cursor: pointer;\n",
              "      display: none;\n",
              "      fill: #1967D2;\n",
              "      height: 32px;\n",
              "      padding: 0 0 0 0;\n",
              "      width: 32px;\n",
              "    }\n",
              "\n",
              "    .colab-df-convert:hover {\n",
              "      background-color: #E2EBFA;\n",
              "      box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
              "      fill: #174EA6;\n",
              "    }\n",
              "\n",
              "    [theme=dark] .colab-df-convert {\n",
              "      background-color: #3B4455;\n",
              "      fill: #D2E3FC;\n",
              "    }\n",
              "\n",
              "    [theme=dark] .colab-df-convert:hover {\n",
              "      background-color: #434B5C;\n",
              "      box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
              "      filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
              "      fill: #FFFFFF;\n",
              "    }\n",
              "  </style>\n",
              "\n",
              "      <script>\n",
              "        const buttonEl =\n",
              "          document.querySelector('#df-5a898961-e422-44be-b073-e944a7e7d1dc button.colab-df-convert');\n",
              "        buttonEl.style.display =\n",
              "          google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
              "\n",
              "        async function convertToInteractive(key) {\n",
              "          const element = document.querySelector('#df-5a898961-e422-44be-b073-e944a7e7d1dc');\n",
              "          const dataTable =\n",
              "            await google.colab.kernel.invokeFunction('convertToInteractive',\n",
              "                                                     [key], {});\n",
              "          if (!dataTable) return;\n",
              "\n",
              "          const docLinkHtml = 'Like what you see? Visit the ' +\n",
              "            '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n",
              "            + ' to learn more about interactive tables.';\n",
              "          element.innerHTML = '';\n",
              "          dataTable['output_type'] = 'display_data';\n",
              "          await google.colab.output.renderOutput(dataTable, element);\n",
              "          const docLink = document.createElement('div');\n",
              "          docLink.innerHTML = docLinkHtml;\n",
              "          element.appendChild(docLink);\n",
              "        }\n",
              "      </script>\n",
              "    </div>\n",
              "  </div>\n",
              "  "
            ]
          },
          "metadata": {},
          "execution_count": 13
        }
      ]
    },
    {
      "cell_type": "markdown",
      "source": [
        "## Events to Events RNN\n",
        "\n",
        "[RNN example](https://blog.floydhub.com/a-beginners-guide-on-recurrent-neural-networks-with-pytorch/)"
      ],
      "metadata": {
        "id": "uSzPOA4T9rLt"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "import torch\n",
        "from torch import nn\n",
        "\n",
        "import numpy as np"
      ],
      "metadata": {
        "id": "mAG2Qksi9sFU"
      },
      "execution_count": 14,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "# text = ['hey how are you','good i am fine','have a nice day']\n",
        "text = train.event.tolist()\n",
        "# Join all the sentences together and extract the unique characters from the combined sentences\n",
        "chars = set(''.join(text))\n",
        "\n",
        "# Creating a dictionary that maps integers to the characters\n",
        "int2char = dict(enumerate(chars))\n",
        "\n",
        "# Creating another dictionary that maps characters to integers\n",
        "char2int = {char: ind for ind, char in int2char.items()}"
      ],
      "metadata": {
        "id": "a3FTKpaoA7Uc"
      },
      "execution_count": 15,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "# Finding the length of the longest string in our data\n",
        "maxlen = len(max(text, key=len))\n",
        "\n",
        "# Padding\n",
        "\n",
        "# A simple loop that loops through the list of sentences and adds a ' ' whitespace until the length of\n",
        "# the sentence matches the length of the longest sentence\n",
        "for i in range(len(text)):\n",
        "  while len(text[i])<maxlen:\n",
        "      text[i] += ' '"
      ],
      "metadata": {
        "id": "KgpNesuKBAqM"
      },
      "execution_count": 16,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "# Creating lists that will hold our input and target sequences\n",
        "input_seq = []\n",
        "target_seq = []\n",
        "\n",
        "for i in range(len(text)):\n",
        "    # Remove last character for input sequence\n",
        "  input_seq.append(text[i][:-1])\n",
        "    \n",
        "    # Remove first character for target sequence\n",
        "  target_seq.append(text[i][1:])\n",
        "  print(\"Input Sequence: {}\\nTarget Sequence: {}\".format(input_seq[i], target_seq[i]))"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "xHxIDm8KBD4r",
        "outputId": "8369ea14-9351-4c1d-c807-3bb6a07c26f3"
      },
      "execution_count": 17,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Input Sequence: who escape-51 expectation men             \n",
            "Target Sequence: ho escape-51 expectation men              \n",
            "Input Sequence: UNK proclaimed UNK Cowshed                \n",
            "Target Sequence: NK proclaimed UNK Cowshed                 \n",
            "Input Sequence: UNK celebrated attack Revolution          \n",
            "Target Sequence: NK celebrated attack Revolution           \n",
            "Input Sequence: UNK battle-36 pre-eminence UNK            \n",
            "Target Sequence: NK battle-36 pre-eminence UNK             \n",
            "Input Sequence: UNK declares UNK windmill                 \n",
            "Target Sequence: NK declares UNK windmill                  \n",
            "Input Sequence: who meander-47 UNK farm                   \n",
            "Target Sequence: ho meander-47 UNK farm                    \n",
            "Input Sequence: UNK claims swine idea                     \n",
            "Target Sequence: NK claims swine idea                      \n",
            "Input Sequence: UNK cooperate-73-3 commitment windmill    \n",
            "Target Sequence: NK cooperate-73-3 commitment windmill     \n",
            "Input Sequence: UNK sabotage atmospheric_phenomenon animal\n",
            "Target Sequence: NK sabotage atmospheric_phenomenon animals\n",
            "Input Sequence: he consorting canine farm                 \n",
            "Target Sequence: e consorting canine farm                  \n",
            "Input Sequence: who representing military_action battle   \n",
            "Target Sequence: ho representing military_action battle    \n",
            "Input Sequence: who adopting song man                     \n",
            "Target Sequence: ho adopting song man                      \n",
            "Input Sequence: they convinced UNK animals                \n",
            "Target Sequence: hey convinced UNK animals                 \n",
            "Input Sequence: UNK restored UNK windmill                 \n",
            "Target Sequence: NK restored UNK windmill                  \n",
            "Input Sequence: they wounded outgo workhorse              \n",
            "Target Sequence: hey wounded outgo workhorse               \n",
            "Input Sequence: he working ill_health windmill            \n",
            "Target Sequence: e working ill_health windmill             \n",
            "Input Sequence: UNK given artistic_movement surgeon       \n",
            "Target Sequence: NK given artistic_movement surgeon        \n"
          ]
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "for i in range(len(text)):\n",
        "    input_seq[i] = [char2int[character] for character in input_seq[i]]\n",
        "    target_seq[i] = [char2int[character] for character in target_seq[i]]"
      ],
      "metadata": {
        "id": "1pz-Lp25BFwV"
      },
      "execution_count": 18,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "dict_size = len(char2int)\n",
        "seq_len = maxlen - 1\n",
        "batch_size = len(text)\n",
        "\n",
        "def one_hot_encode(sequence, dict_size, seq_len, batch_size):\n",
        "    # Creating a multi-dimensional array of zeros with the desired output shape\n",
        "    features = np.zeros((batch_size, seq_len, dict_size), dtype=np.float32)\n",
        "    \n",
        "    # Replacing the 0 at the relevant character index with a 1 to represent that character\n",
        "    for i in range(batch_size):\n",
        "        for u in range(seq_len):\n",
        "            features[i, u, sequence[i][u]] = 1\n",
        "    return features"
      ],
      "metadata": {
        "id": "If4op2-EBIbl"
      },
      "execution_count": 19,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "# Input shape --> (Batch Size, Sequence Length, One-Hot Encoding Size)\n",
        "input_seq = one_hot_encode(input_seq, dict_size, seq_len, batch_size)"
      ],
      "metadata": {
        "id": "o9dyQgJ2BKfL"
      },
      "execution_count": 20,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "input_seq = torch.from_numpy(input_seq)\n",
        "target_seq = torch.Tensor(target_seq)"
      ],
      "metadata": {
        "id": "Vr2Kg6BuBMCM"
      },
      "execution_count": 21,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "# torch.cuda.is_available() checks and returns a Boolean True if a GPU is available, else it'll return False\n",
        "is_cuda = torch.cuda.is_available()\n",
        "\n",
        "# If we have a GPU available, we'll set our device to GPU. We'll use this device variable later in our code.\n",
        "if is_cuda:\n",
        "    device = torch.device(\"cuda\")\n",
        "    print(\"GPU is available\")\n",
        "else:\n",
        "    device = torch.device(\"cpu\")\n",
        "    print(\"GPU not available, CPU used\")"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "nY5FK3idBOlr",
        "outputId": "3c71c214-7f30-4814-d81f-c4c03ac4df63"
      },
      "execution_count": 22,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "GPU not available, CPU used\n"
          ]
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "class Model(nn.Module):\n",
        "    def __init__(self, input_size, output_size, hidden_dim, n_layers):\n",
        "        super(Model, self).__init__()\n",
        "\n",
        "        # Defining some parameters\n",
        "        self.hidden_dim = hidden_dim\n",
        "        self.n_layers = n_layers\n",
        "\n",
        "        #Defining the layers\n",
        "        # RNN Layer\n",
        "        self.rnn = nn.RNN(input_size, hidden_dim, n_layers, batch_first=True)   \n",
        "        # Fully connected layer\n",
        "        self.fc = nn.Linear(hidden_dim, output_size)\n",
        "    \n",
        "    def forward(self, x):\n",
        "        \n",
        "        batch_size = x.size(0)\n",
        "\n",
        "        # Initializing hidden state for first input using method defined below\n",
        "        hidden = self.init_hidden(batch_size)\n",
        "\n",
        "        # Passing in the input and hidden state into the model and obtaining outputs\n",
        "        out, hidden = self.rnn(x, hidden)\n",
        "        \n",
        "        # Reshaping the outputs such that it can be fit into the fully connected layer\n",
        "        out = out.contiguous().view(-1, self.hidden_dim)\n",
        "        out = self.fc(out)\n",
        "        \n",
        "        return out, hidden\n",
        "    \n",
        "    def init_hidden(self, batch_size):\n",
        "        # This method generates the first hidden state of zeros which we'll use in the forward pass\n",
        "        # We'll send the tensor holding the hidden state to the device we specified earlier as well\n",
        "        hidden = torch.zeros(self.n_layers, batch_size, self.hidden_dim)\n",
        "        return hidden"
      ],
      "metadata": {
        "id": "oAdCASLFBTqs"
      },
      "execution_count": 23,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "# Instantiate the model with hyperparameters\n",
        "model = Model(input_size=dict_size, output_size=dict_size, hidden_dim=12, n_layers=1)\n",
        "# We'll also set the model to the device that we defined earlier (default is CPU)\n",
        "model.to(device)\n",
        "\n",
        "# Define hyperparameters\n",
        "n_epochs = 100\n",
        "lr=0.01\n",
        "\n",
        "# Define Loss, Optimizer\n",
        "criterion = nn.CrossEntropyLoss()\n",
        "optimizer = torch.optim.Adam(model.parameters(), lr=lr)"
      ],
      "metadata": {
        "id": "lgrM8uD2BWBF"
      },
      "execution_count": 24,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "# Training Run\n",
        "for epoch in range(1, n_epochs + 1):\n",
        "    optimizer.zero_grad() # Clears existing gradients from previous epoch\n",
        "    input_seq.to(device)\n",
        "    output, hidden = model(input_seq)\n",
        "    loss = criterion(output, target_seq.view(-1).long())\n",
        "    loss.backward() # Does backpropagation and calculates gradients\n",
        "    optimizer.step() # Updates the weights accordingly\n",
        "    \n",
        "    if epoch%10 == 0:\n",
        "        print('Epoch: {}/{}.............'.format(epoch, n_epochs), end=' ')\n",
        "        print(\"Loss: {:.4f}\".format(loss.item()))"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "egGrg3eiBYXD",
        "outputId": "e7cfd305-1951-44e7-94cf-d92dc3f17fb3"
      },
      "execution_count": 25,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Epoch: 10/100............. Loss: 2.6087\n",
            "Epoch: 20/100............. Loss: 2.5179\n",
            "Epoch: 30/100............. Loss: 2.4569\n",
            "Epoch: 40/100............. Loss: 2.3277\n",
            "Epoch: 50/100............. Loss: 2.1268\n",
            "Epoch: 60/100............. Loss: 1.9311\n",
            "Epoch: 70/100............. Loss: 1.7815\n",
            "Epoch: 80/100............. Loss: 1.6578\n",
            "Epoch: 90/100............. Loss: 1.5524\n",
            "Epoch: 100/100............. Loss: 1.4615\n"
          ]
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "# This function takes in the model and character as arguments and returns the next character prediction and hidden state\n",
        "def predict(model, character):\n",
        "    # One-hot encoding our input to fit into the model\n",
        "    character = np.array([[char2int[c] for c in character]])\n",
        "    character = one_hot_encode(character, dict_size, character.shape[1], 1)\n",
        "    character = torch.from_numpy(character)\n",
        "    character.to(device)\n",
        "    \n",
        "    out, hidden = model(character)\n",
        "\n",
        "    prob = nn.functional.softmax(out[-1], dim=0).data\n",
        "    # Taking the class with the highest probability score from the output\n",
        "    char_ind = torch.max(prob, dim=0)[1].item()\n",
        "\n",
        "    return int2char[char_ind], hidden"
      ],
      "metadata": {
        "id": "oxO1sVQqBbC1"
      },
      "execution_count": 26,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "# This function takes the desired output length and input characters as arguments, returning the produced sentence\n",
        "def sample(model, out_len, start='hey'):\n",
        "    model.eval() # eval mode\n",
        "    start = start.lower()\n",
        "    # First off, run through the starting characters\n",
        "    chars = [ch for ch in start]\n",
        "    size = out_len - len(chars)\n",
        "    # Now pass in the previous characters and get a new one\n",
        "    for ii in range(size):\n",
        "        char, h = predict(model, chars)\n",
        "        chars.append(char)\n",
        "\n",
        "    return ''.join(chars)"
      ],
      "metadata": {
        "id": "nTAsEek3BdmF"
      },
      "execution_count": 27,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "sample(model, 15, 'claims')"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 35
        },
        "id": "OVgbUptiHFus",
        "outputId": "8cd7dae5-b8fd-4b45-8115-59fe126f471d"
      },
      "execution_count": 34,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "'claimse        '"
            ],
            "application/vnd.google.colaboratory.intrinsic+json": {
              "type": "string"
            }
          },
          "metadata": {},
          "execution_count": 34
        }
      ]
    }
  ]
}