{ "nbformat": 4, "nbformat_minor": 0, "metadata": { "colab": { "name": "1_NLP_StoryTelling.ipynb", "provenance": [], "collapsed_sections": [], "authorship_tag": "ABX9TyPQ3W15ZgzmdHqSRyajXgKm", "include_colab_link": true }, "kernelspec": { "name": "python3", "display_name": "Python 3" }, "language_info": { "name": "python" }, "widgets": { "application/vnd.jupyter.widget-state+json": { "4eedf1ebd7724c47886dbaf8c432fa27": { "model_module": "@jupyter-widgets/controls", "model_name": "HBoxModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HBoxModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HBoxView", "box_style": "", "children": [ "IPY_MODEL_2ae174538489490abac7de3fe89b72f9", "IPY_MODEL_1a3fd53fe31a403f8af07fb4e559cd9a", "IPY_MODEL_dbf7cac41e0845088ae3d2de6e6685fd" ], "layout": "IPY_MODEL_811692c4bfb14ab9bdf0944a185bcdea" } }, "2ae174538489490abac7de3fe89b72f9": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_4d638d54f81b4450978a4758ca3a4c3c", "placeholder": "​", "style": "IPY_MODEL_e001a703a4bb4cf6a6fbbc53a35ca1f6", "value": "Downloading https://raw.githubusercontent.com/stanfordnlp/stanza-resources/main/resources_1.4.0.json: " } }, "1a3fd53fe31a403f8af07fb4e559cd9a": { "model_module": "@jupyter-widgets/controls", "model_name": "FloatProgressModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "FloatProgressModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "ProgressView", "bar_style": "success", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_2e1edd0cd4184d6288ea1a48c5a3c54e", "max": 25998, "min": 0, "orientation": "horizontal", "style": "IPY_MODEL_ad283e7f367a4613a103cf73d6c95b11", "value": 25998 } }, "dbf7cac41e0845088ae3d2de6e6685fd": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_077c55a8e44c447387f24a35c4560891", "placeholder": "​", "style": "IPY_MODEL_112082ac7aba40caa03ac0c1b5db5b04", "value": " 154k/? [00:00<00:00, 3.40MB/s]" } }, "811692c4bfb14ab9bdf0944a185bcdea": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "4d638d54f81b4450978a4758ca3a4c3c": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "e001a703a4bb4cf6a6fbbc53a35ca1f6": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "2e1edd0cd4184d6288ea1a48c5a3c54e": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "ad283e7f367a4613a103cf73d6c95b11": { "model_module": "@jupyter-widgets/controls", "model_name": "ProgressStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "ProgressStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "bar_color": null, "description_width": "" } }, "077c55a8e44c447387f24a35c4560891": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "112082ac7aba40caa03ac0c1b5db5b04": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "ae3f920b0f2643aeb9620cd9eb6110e9": { "model_module": "@jupyter-widgets/controls", "model_name": "HBoxModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HBoxModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HBoxView", "box_style": "", "children": [ "IPY_MODEL_cd6dc76d3ac34df79ced860e7c9d31de", "IPY_MODEL_ae75f7bb1b7c494dbb2ef8f23786f496", "IPY_MODEL_b040c9e48a7c4c8080747e6eefbd8242" ], "layout": "IPY_MODEL_464ceb334022420ca73234932f45e434" } }, "cd6dc76d3ac34df79ced860e7c9d31de": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_e75a0779c3b744eba444f727e9b5b0b9", "placeholder": "​", "style": "IPY_MODEL_72bdf9e623854a288c1d7bbb3ad5bf15", "value": "Downloading https://huggingface.co/stanfordnlp/stanza-en/resolve/v1.4.0/models/default.zip: 100%" } }, "ae75f7bb1b7c494dbb2ef8f23786f496": { "model_module": "@jupyter-widgets/controls", "model_name": "FloatProgressModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "FloatProgressModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "ProgressView", "bar_style": "success", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_f6c3a13239ce449695bd7ba25a1eaa37", "max": 479293702, "min": 0, "orientation": "horizontal", "style": "IPY_MODEL_a0b2730e6a53429ea3269d85cf531541", "value": 479293702 } }, "b040c9e48a7c4c8080747e6eefbd8242": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_ee46b4454f934db5b1eed5dcfd857188", "placeholder": "​", "style": "IPY_MODEL_da524a9f74024d8296896804ae156e00", "value": " 479M/479M [00:08<00:00, 50.0MB/s]" } }, "464ceb334022420ca73234932f45e434": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "e75a0779c3b744eba444f727e9b5b0b9": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "72bdf9e623854a288c1d7bbb3ad5bf15": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "f6c3a13239ce449695bd7ba25a1eaa37": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "a0b2730e6a53429ea3269d85cf531541": { "model_module": "@jupyter-widgets/controls", "model_name": "ProgressStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "ProgressStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "bar_color": null, "description_width": "" } }, "ee46b4454f934db5b1eed5dcfd857188": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "da524a9f74024d8296896804ae156e00": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "f96daf5dac8648389d27189d0a00b6d3": { "model_module": "@jupyter-widgets/controls", "model_name": "HBoxModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HBoxModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HBoxView", "box_style": "", "children": [ "IPY_MODEL_85f56de7a9954e26abba220c82075d45", "IPY_MODEL_c40d78fdad3f4a259bcc7bf7bfa8c43b", "IPY_MODEL_672f5266150e4ba9bfb7b6e3a4e86879" ], "layout": "IPY_MODEL_267e1b66cb424f19b8a1c8e20f98fa6e" } }, "85f56de7a9954e26abba220c82075d45": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_e8de14a6fb5741338d8cf77c956b3761", "placeholder": "​", "style": "IPY_MODEL_51d05cd434034cdea16a1a533aa15faf", "value": "Downloading https://raw.githubusercontent.com/stanfordnlp/stanza-resources/main/resources_1.4.0.json: " } }, "c40d78fdad3f4a259bcc7bf7bfa8c43b": { "model_module": "@jupyter-widgets/controls", "model_name": "FloatProgressModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "FloatProgressModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "ProgressView", "bar_style": "success", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_35979960457f4233bcd32d136408a816", "max": 25998, "min": 0, "orientation": "horizontal", "style": "IPY_MODEL_785b0d3affed4e87be0c418815247b83", "value": 25998 } }, "672f5266150e4ba9bfb7b6e3a4e86879": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_f6c91f37203141f881219d5b0f869ad7", "placeholder": "​", "style": "IPY_MODEL_6b56d9f3d5734b2aaac1089e3b492a83", "value": " 154k/? [00:00<00:00, 2.64MB/s]" } }, "267e1b66cb424f19b8a1c8e20f98fa6e": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "e8de14a6fb5741338d8cf77c956b3761": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "51d05cd434034cdea16a1a533aa15faf": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "35979960457f4233bcd32d136408a816": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "785b0d3affed4e87be0c418815247b83": { "model_module": "@jupyter-widgets/controls", "model_name": "ProgressStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "ProgressStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "bar_color": null, "description_width": "" } }, "f6c91f37203141f881219d5b0f869ad7": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "6b56d9f3d5734b2aaac1089e3b492a83": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } } } } }, "cells": [ { "cell_type": "markdown", "metadata": { "id": "view-in-github", "colab_type": "text" }, "source": [ "\"Open" ] }, { "cell_type": "code", "execution_count": 1, "metadata": { "id": "ShG7yaY20_jg", "colab": { "base_uri": "https://localhost:8080/" }, "outputId": "bba7084f-b552-4027-cb09-858e6c549bc1" }, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "\u001b[K |████████████████████████████████| 574 kB 5.1 MB/s \n", "\u001b[K |████████████████████████████████| 4.7 MB 33.9 MB/s \n", "\u001b[K |████████████████████████████████| 197 kB 68.0 MB/s \n", "\u001b[K |████████████████████████████████| 6.6 MB 45.7 MB/s \n", "\u001b[K |████████████████████████████████| 120 kB 56.5 MB/s \n", "\u001b[?25h Building wheel for emoji (setup.py) ... \u001b[?25l\u001b[?25hdone\n" ] } ], "source": [ "!pip install -Uqq stanza" ] }, { "cell_type": "code", "source": [ "import stanza\n", "import nltk\n", "from nltk.corpus import wordnet\n", "from nltk.corpus import verbnet\n", "\n", "stanza.download('en') # download English model\n", "nlp = stanza.Pipeline('en', processors='tokenize,pos,lemma,depparse,ner')\n", "\n", "nltk.download('omw-1.4')\n", "nltk.download('wordnet')\n", "nltk.download('verbnet')" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 558, "referenced_widgets": [ "4eedf1ebd7724c47886dbaf8c432fa27", "2ae174538489490abac7de3fe89b72f9", "1a3fd53fe31a403f8af07fb4e559cd9a", "dbf7cac41e0845088ae3d2de6e6685fd", "811692c4bfb14ab9bdf0944a185bcdea", "4d638d54f81b4450978a4758ca3a4c3c", "e001a703a4bb4cf6a6fbbc53a35ca1f6", "2e1edd0cd4184d6288ea1a48c5a3c54e", "ad283e7f367a4613a103cf73d6c95b11", "077c55a8e44c447387f24a35c4560891", "112082ac7aba40caa03ac0c1b5db5b04", "ae3f920b0f2643aeb9620cd9eb6110e9", "cd6dc76d3ac34df79ced860e7c9d31de", "ae75f7bb1b7c494dbb2ef8f23786f496", "b040c9e48a7c4c8080747e6eefbd8242", "464ceb334022420ca73234932f45e434", "e75a0779c3b744eba444f727e9b5b0b9", "72bdf9e623854a288c1d7bbb3ad5bf15", "f6c3a13239ce449695bd7ba25a1eaa37", "a0b2730e6a53429ea3269d85cf531541", "ee46b4454f934db5b1eed5dcfd857188", "da524a9f74024d8296896804ae156e00", "f96daf5dac8648389d27189d0a00b6d3", "85f56de7a9954e26abba220c82075d45", "c40d78fdad3f4a259bcc7bf7bfa8c43b", "672f5266150e4ba9bfb7b6e3a4e86879", "267e1b66cb424f19b8a1c8e20f98fa6e", "e8de14a6fb5741338d8cf77c956b3761", "51d05cd434034cdea16a1a533aa15faf", "35979960457f4233bcd32d136408a816", "785b0d3affed4e87be0c418815247b83", "f6c91f37203141f881219d5b0f869ad7", "6b56d9f3d5734b2aaac1089e3b492a83" ] }, "id": "RMLlmBIc85_s", "outputId": "ee4afc3d-a88c-4a81-8af8-66d0d697925b" }, "execution_count": 2, "outputs": [ { "output_type": "display_data", "data": { "text/plain": [ "Downloading https://raw.githubusercontent.com/stanfordnlp/stanza-resources/main/resources_1.4.0.json: 0%| …" ], "application/vnd.jupyter.widget-view+json": { "version_major": 2, "version_minor": 0, "model_id": "4eedf1ebd7724c47886dbaf8c432fa27" } }, "metadata": {} }, { "output_type": "stream", "name": "stderr", "text": [ "INFO:stanza:Downloading default packages for language: en (English)...\n" ] }, { "output_type": "display_data", "data": { "text/plain": [ "Downloading https://huggingface.co/stanfordnlp/stanza-en/resolve/v1.4.0/models/default.zip: 0%| | 0…" ], "application/vnd.jupyter.widget-view+json": { "version_major": 2, "version_minor": 0, "model_id": "ae3f920b0f2643aeb9620cd9eb6110e9" } }, "metadata": {} }, { "output_type": "stream", "name": "stderr", "text": [ "INFO:stanza:Finished downloading models and saved to /root/stanza_resources.\n" ] }, { "output_type": "display_data", "data": { "text/plain": [ "Downloading https://raw.githubusercontent.com/stanfordnlp/stanza-resources/main/resources_1.4.0.json: 0%| …" ], "application/vnd.jupyter.widget-view+json": { "version_major": 2, "version_minor": 0, "model_id": "f96daf5dac8648389d27189d0a00b6d3" } }, "metadata": {} }, { "output_type": "stream", "name": "stderr", "text": [ "INFO:stanza:Loading these models for language: en (English):\n", "=========================\n", "| Processor | Package |\n", "-------------------------\n", "| tokenize | combined |\n", "| pos | combined |\n", "| lemma | combined |\n", "| depparse | combined |\n", "| ner | ontonotes |\n", "=========================\n", "\n", "INFO:stanza:Use device: cpu\n", "INFO:stanza:Loading: tokenize\n", "INFO:stanza:Loading: pos\n", "INFO:stanza:Loading: lemma\n", "INFO:stanza:Loading: depparse\n", "INFO:stanza:Loading: ner\n", "INFO:stanza:Done loading processors!\n", "[nltk_data] Downloading package omw-1.4 to /root/nltk_data...\n", "[nltk_data] Downloading package wordnet to /root/nltk_data...\n", "[nltk_data] Downloading package verbnet to /root/nltk_data...\n", "[nltk_data] Unzipping corpora/verbnet.zip.\n" ] }, { "output_type": "execute_result", "data": { "text/plain": [ "True" ] }, "metadata": {}, "execution_count": 2 } ] }, { "cell_type": "code", "source": [ "def events(text):\n", " '''\n", " as per Page: 32\n", " inputs plot text\n", " returns (s, v, o, m)\n", " where v is a verb, \n", " s is the subject of the verb,\n", " o is the object of the verb, \n", " and m is the modifier—or “wildcard”, \n", " which can be a propositional object, \n", " indirect object, causal complement \n", " (e.g., in “I was glad that he drove,” “drove” is the causal complement to “glad.”), \n", " or any other dependency unclassifiable to Stanford’s dependency parser.\n", " '''\n", " doc = nlp(text)\n", "\n", " v,s,o,m = 'UNK','UNK','UNK','UNK'\n", " for sent in doc.sentences:\n", " for word in sent.words:\n", " if word.pos == \"VERB\":\n", " # Generalized\n", " # 3. Verbs were replaced by VerbNet [79] version 3.2.43 frames (e.g. “arrived”/“arriving” become “escape-51.1”)\n", " # TODO: verbnet.classids('scurry')\n", " gVerb = verbnet.classids(word.text) #.split('.')[0]\n", " v = gVerb[0].split('.')[0] if gVerb else word.text #verb\n", " if word.deprel == \"nsubj\" and word.pos == \"PRON\":\n", " s = word.text #subject of the verb\n", " if o == 'UNK' and word.deprel == \"obl\" and word.pos == \"NOUN\":\n", " # Generalized\n", " if word.pos == \"NOUN\":\n", " # 1. Named entities were identified (cf. [77]), and “PERSON” names were replaced with the tag n, \n", " # where n indicates the nth character name in the sentence. Other named entities were labelled as their \n", " # named entity recognition (NER) category (e.g. LOCATION, ORGANIZATION, etc.)\n", " # 2. nouns were replaced by the WordNet [78] Synset two levels up in the inherited hypernym hierarchy\n", " # TODO: woi.hypernyms()[0]\n", " # e.g. self-propelled vehicle.n.01 vs the original word “car” (car.n.01)), while avoiding labelling it too generally (e.g. entity.n.01\n", " synset = wordnet.synsets(word.text)\n", " if synset:\n", " woi = synset[0] if synset[0] else word.text\n", " o = woi.hypernyms()[0].name().split('.')[0]\n", " else:\n", " o = word.text\n", " else:\n", " o = word.text #object of the verb\n", " #Character Name Numbering\n", "\n", " #Adding Genre Information\n", " if word.deprel == \"det\":\n", " m = sent.words[word.head-1].text #modifier\n", " return s + \" \" + v + \" \" + o + \" \" + m" ], "metadata": { "id": "z0vnU5-n1IPU" }, "execution_count": 11, "outputs": [] }, { "cell_type": "code", "source": [ "!wget https://www.dropbox.com/s/24pa44w7u7wvtma/plots.zip" ], "metadata": { "id": "XZ3gIRv39WJ0" }, "execution_count": null, "outputs": [] }, { "cell_type": "code", "source": [ "!unzip plots.zip" ], "metadata": { "id": "1umsRu3M9W1j" }, "execution_count": null, "outputs": [] }, { "cell_type": "code", "source": [ "import pandas as pd\n", "\n", "def parseFile():\n", " with open('plots','r') as f:\n", " lines = [line.split(\"\")[-1] for line in f if line.strip()]\n", " return lines\n", "\n", "df = pd.DataFrame(parseFile(), columns=[\"plots\"])\n", "\n", "df.head()" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 206 }, "id": "M5tjXU_k9IUt", "outputId": "6bdad539-fc4c-4dac-c93a-76ea29210a19" }, "execution_count": 4, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ " plots\n", "0 Old Major, the old boar on the Manor Farm, sum...\n", "1 When Major dies, two young pigs, Snowball and ...\n", "2 The animals revolt and drive the drunken and i...\n", "3 They adopt the Seven Commandments of Animalism...\n", "4 Snowball teaches the animals to read and write..." ], "text/html": [ "\n", "
\n", "
\n", "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
plots
0Old Major, the old boar on the Manor Farm, sum...
1When Major dies, two young pigs, Snowball and ...
2The animals revolt and drive the drunken and i...
3They adopt the Seven Commandments of Animalism...
4Snowball teaches the animals to read and write...
\n", "
\n", " \n", " \n", " \n", "\n", " \n", "
\n", "
\n", " " ] }, "metadata": {}, "execution_count": 4 } ] }, { "cell_type": "code", "source": [ "df.count()" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "-yNA-gpGGCHk", "outputId": "cb0df6ff-43ad-471d-c190-32b2c53a4c6d" }, "execution_count": 6, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "plots 121649\n", "dtype: int64" ] }, "metadata": {}, "execution_count": 6 } ] }, { "cell_type": "code", "source": [ "train = df.loc[9:25, ['plots']]" ], "metadata": { "id": "E_VDLWZZ9Iok" }, "execution_count": 5, "outputs": [] }, { "cell_type": "code", "source": [ "train.head()" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 206 }, "id": "vytT-pIXGK6L", "outputId": "3de1bf33-ee5e-4552-a6a4-1cb5e2435268" }, "execution_count": 9, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ " plots\n", "9 Snowball, who has been studying the battles of...\n", "10 Snowball's popularity soars, and this event is...\n", "11 It is celebrated annually with the firing of a...\n", "12 Napoleon and Snowball struggle for pre-eminenc...\n", "13 When Snowball announces his plans to modernize..." ], "text/html": [ "\n", "
\n", "
\n", "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
plots
9Snowball, who has been studying the battles of...
10Snowball's popularity soars, and this event is...
11It is celebrated annually with the firing of a...
12Napoleon and Snowball struggle for pre-eminenc...
13When Snowball announces his plans to modernize...
\n", "
\n", " \n", " \n", " \n", "\n", " \n", "
\n", "
\n", " " ] }, "metadata": {}, "execution_count": 9 } ] }, { "cell_type": "code", "source": [ "train['event'] = train.apply(lambda row: events(row.plots), axis=1)" ], "metadata": { "id": "mi-IBVZTGQH9" }, "execution_count": 12, "outputs": [] }, { "cell_type": "code", "source": [ "train.head()" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 206 }, "id": "1c5clUA_Gp4L", "outputId": "ea429a37-1235-4cb4-b335-bb2e2e1ad984" }, "execution_count": 13, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ " plots \\\n", "9 Snowball, who has been studying the battles of... \n", "10 Snowball's popularity soars, and this event is... \n", "11 It is celebrated annually with the firing of a... \n", "12 Napoleon and Snowball struggle for pre-eminenc... \n", "13 When Snowball announces his plans to modernize... \n", "\n", " event \n", "9 who escape-51 expectation men \n", "10 UNK proclaimed UNK Cowshed \n", "11 UNK celebrated attack Revolution \n", "12 UNK battle-36 pre-eminence UNK \n", "13 UNK declares UNK windmill " ], "text/html": [ "\n", "
\n", "
\n", "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
plotsevent
9Snowball, who has been studying the battles of...who escape-51 expectation men
10Snowball's popularity soars, and this event is...UNK proclaimed UNK Cowshed
11It is celebrated annually with the firing of a...UNK celebrated attack Revolution
12Napoleon and Snowball struggle for pre-eminenc...UNK battle-36 pre-eminence UNK
13When Snowball announces his plans to modernize...UNK declares UNK windmill
\n", "
\n", " \n", " \n", " \n", "\n", " \n", "
\n", "
\n", " " ] }, "metadata": {}, "execution_count": 13 } ] }, { "cell_type": "markdown", "source": [ "## Events to Events RNN\n", "\n", "[RNN example](https://blog.floydhub.com/a-beginners-guide-on-recurrent-neural-networks-with-pytorch/)" ], "metadata": { "id": "uSzPOA4T9rLt" } }, { "cell_type": "code", "source": [ "import torch\n", "from torch import nn\n", "\n", "import numpy as np" ], "metadata": { "id": "mAG2Qksi9sFU" }, "execution_count": 14, "outputs": [] }, { "cell_type": "code", "source": [ "# text = ['hey how are you','good i am fine','have a nice day']\n", "text = train.event.tolist()\n", "# Join all the sentences together and extract the unique characters from the combined sentences\n", "chars = set(''.join(text))\n", "\n", "# Creating a dictionary that maps integers to the characters\n", "int2char = dict(enumerate(chars))\n", "\n", "# Creating another dictionary that maps characters to integers\n", "char2int = {char: ind for ind, char in int2char.items()}" ], "metadata": { "id": "a3FTKpaoA7Uc" }, "execution_count": 15, "outputs": [] }, { "cell_type": "code", "source": [ "# Finding the length of the longest string in our data\n", "maxlen = len(max(text, key=len))\n", "\n", "# Padding\n", "\n", "# A simple loop that loops through the list of sentences and adds a ' ' whitespace until the length of\n", "# the sentence matches the length of the longest sentence\n", "for i in range(len(text)):\n", " while len(text[i]) (Batch Size, Sequence Length, One-Hot Encoding Size)\n", "input_seq = one_hot_encode(input_seq, dict_size, seq_len, batch_size)" ], "metadata": { "id": "o9dyQgJ2BKfL" }, "execution_count": 20, "outputs": [] }, { "cell_type": "code", "source": [ "input_seq = torch.from_numpy(input_seq)\n", "target_seq = torch.Tensor(target_seq)" ], "metadata": { "id": "Vr2Kg6BuBMCM" }, "execution_count": 21, "outputs": [] }, { "cell_type": "code", "source": [ "# torch.cuda.is_available() checks and returns a Boolean True if a GPU is available, else it'll return False\n", "is_cuda = torch.cuda.is_available()\n", "\n", "# If we have a GPU available, we'll set our device to GPU. We'll use this device variable later in our code.\n", "if is_cuda:\n", " device = torch.device(\"cuda\")\n", " print(\"GPU is available\")\n", "else:\n", " device = torch.device(\"cpu\")\n", " print(\"GPU not available, CPU used\")" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "nY5FK3idBOlr", "outputId": "3c71c214-7f30-4814-d81f-c4c03ac4df63" }, "execution_count": 22, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "GPU not available, CPU used\n" ] } ] }, { "cell_type": "code", "source": [ "class Model(nn.Module):\n", " def __init__(self, input_size, output_size, hidden_dim, n_layers):\n", " super(Model, self).__init__()\n", "\n", " # Defining some parameters\n", " self.hidden_dim = hidden_dim\n", " self.n_layers = n_layers\n", "\n", " #Defining the layers\n", " # RNN Layer\n", " self.rnn = nn.RNN(input_size, hidden_dim, n_layers, batch_first=True) \n", " # Fully connected layer\n", " self.fc = nn.Linear(hidden_dim, output_size)\n", " \n", " def forward(self, x):\n", " \n", " batch_size = x.size(0)\n", "\n", " # Initializing hidden state for first input using method defined below\n", " hidden = self.init_hidden(batch_size)\n", "\n", " # Passing in the input and hidden state into the model and obtaining outputs\n", " out, hidden = self.rnn(x, hidden)\n", " \n", " # Reshaping the outputs such that it can be fit into the fully connected layer\n", " out = out.contiguous().view(-1, self.hidden_dim)\n", " out = self.fc(out)\n", " \n", " return out, hidden\n", " \n", " def init_hidden(self, batch_size):\n", " # This method generates the first hidden state of zeros which we'll use in the forward pass\n", " # We'll send the tensor holding the hidden state to the device we specified earlier as well\n", " hidden = torch.zeros(self.n_layers, batch_size, self.hidden_dim)\n", " return hidden" ], "metadata": { "id": "oAdCASLFBTqs" }, "execution_count": 23, "outputs": [] }, { "cell_type": "code", "source": [ "# Instantiate the model with hyperparameters\n", "model = Model(input_size=dict_size, output_size=dict_size, hidden_dim=12, n_layers=1)\n", "# We'll also set the model to the device that we defined earlier (default is CPU)\n", "model.to(device)\n", "\n", "# Define hyperparameters\n", "n_epochs = 100\n", "lr=0.01\n", "\n", "# Define Loss, Optimizer\n", "criterion = nn.CrossEntropyLoss()\n", "optimizer = torch.optim.Adam(model.parameters(), lr=lr)" ], "metadata": { "id": "lgrM8uD2BWBF" }, "execution_count": 24, "outputs": [] }, { "cell_type": "code", "source": [ "# Training Run\n", "for epoch in range(1, n_epochs + 1):\n", " optimizer.zero_grad() # Clears existing gradients from previous epoch\n", " input_seq.to(device)\n", " output, hidden = model(input_seq)\n", " loss = criterion(output, target_seq.view(-1).long())\n", " loss.backward() # Does backpropagation and calculates gradients\n", " optimizer.step() # Updates the weights accordingly\n", " \n", " if epoch%10 == 0:\n", " print('Epoch: {}/{}.............'.format(epoch, n_epochs), end=' ')\n", " print(\"Loss: {:.4f}\".format(loss.item()))" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "egGrg3eiBYXD", "outputId": "e7cfd305-1951-44e7-94cf-d92dc3f17fb3" }, "execution_count": 25, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "Epoch: 10/100............. Loss: 2.6087\n", "Epoch: 20/100............. Loss: 2.5179\n", "Epoch: 30/100............. Loss: 2.4569\n", "Epoch: 40/100............. Loss: 2.3277\n", "Epoch: 50/100............. Loss: 2.1268\n", "Epoch: 60/100............. Loss: 1.9311\n", "Epoch: 70/100............. Loss: 1.7815\n", "Epoch: 80/100............. Loss: 1.6578\n", "Epoch: 90/100............. Loss: 1.5524\n", "Epoch: 100/100............. Loss: 1.4615\n" ] } ] }, { "cell_type": "code", "source": [ "# This function takes in the model and character as arguments and returns the next character prediction and hidden state\n", "def predict(model, character):\n", " # One-hot encoding our input to fit into the model\n", " character = np.array([[char2int[c] for c in character]])\n", " character = one_hot_encode(character, dict_size, character.shape[1], 1)\n", " character = torch.from_numpy(character)\n", " character.to(device)\n", " \n", " out, hidden = model(character)\n", "\n", " prob = nn.functional.softmax(out[-1], dim=0).data\n", " # Taking the class with the highest probability score from the output\n", " char_ind = torch.max(prob, dim=0)[1].item()\n", "\n", " return int2char[char_ind], hidden" ], "metadata": { "id": "oxO1sVQqBbC1" }, "execution_count": 26, "outputs": [] }, { "cell_type": "code", "source": [ "# This function takes the desired output length and input characters as arguments, returning the produced sentence\n", "def sample(model, out_len, start='hey'):\n", " model.eval() # eval mode\n", " start = start.lower()\n", " # First off, run through the starting characters\n", " chars = [ch for ch in start]\n", " size = out_len - len(chars)\n", " # Now pass in the previous characters and get a new one\n", " for ii in range(size):\n", " char, h = predict(model, chars)\n", " chars.append(char)\n", "\n", " return ''.join(chars)" ], "metadata": { "id": "nTAsEek3BdmF" }, "execution_count": 27, "outputs": [] }, { "cell_type": "code", "source": [ "sample(model, 15, 'claims')" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 35 }, "id": "OVgbUptiHFus", "outputId": "8cd7dae5-b8fd-4b45-8115-59fe126f471d" }, "execution_count": 34, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "'claimse '" ], "application/vnd.google.colaboratory.intrinsic+json": { "type": "string" } }, "metadata": {}, "execution_count": 34 } ] } ] }