{ "nbformat": 4, "nbformat_minor": 0, "metadata": { "colab": { "name": "NLPDayWorkshopBert1.ipynb", "provenance": [], "collapsed_sections": [ "E4anfUzlcLdp", "w26lWw5IcR1e", "jBAsvM8LcU07", "NzN1GkN3fqAd", "nsCUOqq3APO9" ], "machine_shape": "hm" }, "kernelspec": { "name": "python3", "display_name": "Python 3" }, "accelerator": "GPU", "widgets": { "application/vnd.jupyter.widget-state+json": { "3b707fa7cdbe427fa7b1b91e04bc4d93": { "model_module": "@jupyter-widgets/controls", "model_name": "HBoxModel", "state": { "_view_name": "HBoxView", "_dom_classes": [], "_model_name": "HBoxModel", "_view_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_view_count": null, "_view_module_version": "1.5.0", "box_style": "", "layout": "IPY_MODEL_1d91fd6eb9844f02bf772f0641fd3884", "_model_module": "@jupyter-widgets/controls", "children": [ "IPY_MODEL_f1144e00ac7348a28677e50602496d52", "IPY_MODEL_befde3ab120d4aedb7f9ed1f697aae9f" ] } }, "1d91fd6eb9844f02bf772f0641fd3884": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "state": { "_view_name": "LayoutView", "grid_template_rows": null, "right": null, "justify_content": null, "_view_module": "@jupyter-widgets/base", "overflow": null, "_model_module_version": "1.2.0", "_view_count": null, "flex_flow": null, "width": null, "min_width": null, "border": null, "align_items": null, "bottom": null, "_model_module": "@jupyter-widgets/base", "top": null, "grid_column": null, "overflow_y": null, "overflow_x": null, "grid_auto_flow": null, "grid_area": null, "grid_template_columns": null, "flex": null, "_model_name": "LayoutModel", "justify_items": null, "grid_row": null, "max_height": null, "align_content": null, "visibility": null, "align_self": null, "height": null, "min_height": null, "padding": null, "grid_auto_rows": null, "grid_gap": null, "max_width": null, "order": null, "_view_module_version": "1.2.0", "grid_template_areas": null, "object_position": null, "object_fit": null, "grid_auto_columns": null, "margin": null, "display": null, "left": null } }, "f1144e00ac7348a28677e50602496d52": { "model_module": "@jupyter-widgets/controls", "model_name": "FloatProgressModel", "state": { "_view_name": "ProgressView", "style": "IPY_MODEL_060a1c45bbfe43aea84162b8d26a420d", "_dom_classes": [], "description": "Downloading: 100%", "_model_name": "FloatProgressModel", "bar_style": "success", "max": 459, "_view_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "value": 459, "_view_count": null, "_view_module_version": "1.5.0", "orientation": "horizontal", "min": 0, "description_tooltip": null, "_model_module": "@jupyter-widgets/controls", "layout": "IPY_MODEL_a299219fadb1477ba0f68d22bbdf8386" } }, "befde3ab120d4aedb7f9ed1f697aae9f": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "state": { "_view_name": "HTMLView", "style": "IPY_MODEL_41e11aa09f2d4504bdba0ae77ef251cf", "_dom_classes": [], "description": "", "_model_name": "HTMLModel", "placeholder": "", "_view_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "value": " 459/459 [00:00<00:00, 21.5kB/s]", "_view_count": null, "_view_module_version": "1.5.0", "description_tooltip": null, "_model_module": "@jupyter-widgets/controls", "layout": "IPY_MODEL_0e25d56f4a3141699171b59ba0e3c47d" } }, "060a1c45bbfe43aea84162b8d26a420d": { "model_module": "@jupyter-widgets/controls", "model_name": "ProgressStyleModel", "state": { "_view_name": "StyleView", "_model_name": "ProgressStyleModel", "description_width": "initial", "_view_module": "@jupyter-widgets/base", "_model_module_version": "1.5.0", "_view_count": null, "_view_module_version": "1.2.0", "bar_color": null, "_model_module": "@jupyter-widgets/controls" } }, "a299219fadb1477ba0f68d22bbdf8386": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "state": { "_view_name": "LayoutView", "grid_template_rows": null, "right": null, "justify_content": null, "_view_module": "@jupyter-widgets/base", "overflow": null, "_model_module_version": "1.2.0", "_view_count": null, "flex_flow": null, "width": null, "min_width": null, "border": null, "align_items": null, "bottom": null, "_model_module": "@jupyter-widgets/base", "top": null, "grid_column": null, "overflow_y": null, "overflow_x": null, "grid_auto_flow": null, "grid_area": null, "grid_template_columns": null, "flex": null, "_model_name": "LayoutModel", "justify_items": null, "grid_row": null, "max_height": null, "align_content": null, "visibility": null, "align_self": null, "height": null, "min_height": null, "padding": null, "grid_auto_rows": null, "grid_gap": null, "max_width": null, "order": null, "_view_module_version": "1.2.0", "grid_template_areas": null, "object_position": null, "object_fit": null, "grid_auto_columns": null, "margin": null, "display": null, "left": null } }, "41e11aa09f2d4504bdba0ae77ef251cf": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "state": { "_view_name": "StyleView", "_model_name": "DescriptionStyleModel", "description_width": "", "_view_module": "@jupyter-widgets/base", "_model_module_version": "1.5.0", "_view_count": null, "_view_module_version": "1.2.0", "_model_module": "@jupyter-widgets/controls" } }, "0e25d56f4a3141699171b59ba0e3c47d": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "state": { "_view_name": "LayoutView", "grid_template_rows": null, "right": null, "justify_content": null, "_view_module": "@jupyter-widgets/base", "overflow": null, "_model_module_version": "1.2.0", "_view_count": null, "flex_flow": null, "width": null, "min_width": null, "border": null, "align_items": null, "bottom": null, "_model_module": "@jupyter-widgets/base", "top": null, "grid_column": null, "overflow_y": null, "overflow_x": null, "grid_auto_flow": null, "grid_area": null, "grid_template_columns": null, "flex": null, "_model_name": "LayoutModel", "justify_items": null, "grid_row": null, "max_height": null, "align_content": null, "visibility": null, "align_self": null, "height": null, "min_height": null, "padding": null, "grid_auto_rows": null, "grid_gap": null, "max_width": null, "order": null, "_view_module_version": "1.2.0", "grid_template_areas": null, "object_position": null, "object_fit": null, "grid_auto_columns": null, "margin": null, "display": null, "left": null } }, "e49a94889ef546d08e552ff54df29588": { "model_module": "@jupyter-widgets/controls", "model_name": "HBoxModel", "state": { "_view_name": "HBoxView", "_dom_classes": [], "_model_name": "HBoxModel", "_view_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_view_count": null, "_view_module_version": "1.5.0", "box_style": "", "layout": "IPY_MODEL_9c349123dd3a4d89b58983c0bc7cab2b", "_model_module": "@jupyter-widgets/controls", "children": [ "IPY_MODEL_224ad3c6125143959142a35144965fd6", "IPY_MODEL_62d0ad787c724c1d986451291e5c7070" ] } }, "9c349123dd3a4d89b58983c0bc7cab2b": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "state": { "_view_name": "LayoutView", "grid_template_rows": null, "right": null, "justify_content": null, "_view_module": "@jupyter-widgets/base", "overflow": null, "_model_module_version": "1.2.0", "_view_count": null, "flex_flow": null, "width": null, "min_width": null, "border": null, "align_items": null, "bottom": null, "_model_module": "@jupyter-widgets/base", "top": null, "grid_column": null, "overflow_y": null, "overflow_x": null, "grid_auto_flow": null, "grid_area": null, "grid_template_columns": null, "flex": null, "_model_name": "LayoutModel", "justify_items": null, "grid_row": null, "max_height": null, "align_content": null, "visibility": null, "align_self": null, "height": null, "min_height": null, "padding": null, "grid_auto_rows": null, "grid_gap": null, "max_width": null, "order": null, "_view_module_version": "1.2.0", "grid_template_areas": null, "object_position": null, "object_fit": null, "grid_auto_columns": null, "margin": null, "display": null, "left": null } }, "224ad3c6125143959142a35144965fd6": { "model_module": "@jupyter-widgets/controls", "model_name": "FloatProgressModel", "state": { "_view_name": "ProgressView", "style": "IPY_MODEL_4729cb6e07404a8692dbe6a1fe9eafe4", "_dom_classes": [], "description": "Downloading: 100%", "_model_name": "FloatProgressModel", "bar_style": "success", "max": 531146902, "_view_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "value": 531146902, "_view_count": null, "_view_module_version": "1.5.0", "orientation": "horizontal", "min": 0, "description_tooltip": null, "_model_module": "@jupyter-widgets/controls", "layout": "IPY_MODEL_6ff02f76b6904f2395f2a5088e7e4adc" } }, "62d0ad787c724c1d986451291e5c7070": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "state": { "_view_name": "HTMLView", "style": "IPY_MODEL_d0fce87f97b945a4ba7a03e420daffd4", "_dom_classes": [], "description": "", "_model_name": "HTMLModel", "placeholder": "", "_view_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "value": " 531M/531M [00:07<00:00, 66.8MB/s]", "_view_count": null, "_view_module_version": "1.5.0", "description_tooltip": null, "_model_module": "@jupyter-widgets/controls", "layout": "IPY_MODEL_50a7c5a2d24e47faa9072fe21d0ae396" } }, "4729cb6e07404a8692dbe6a1fe9eafe4": { "model_module": "@jupyter-widgets/controls", "model_name": "ProgressStyleModel", "state": { "_view_name": "StyleView", "_model_name": "ProgressStyleModel", "description_width": "initial", "_view_module": "@jupyter-widgets/base", "_model_module_version": "1.5.0", "_view_count": null, "_view_module_version": "1.2.0", "bar_color": null, "_model_module": "@jupyter-widgets/controls" } }, "6ff02f76b6904f2395f2a5088e7e4adc": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "state": { "_view_name": "LayoutView", "grid_template_rows": null, "right": null, "justify_content": null, "_view_module": "@jupyter-widgets/base", "overflow": null, "_model_module_version": "1.2.0", "_view_count": null, "flex_flow": null, "width": null, "min_width": null, "border": null, "align_items": null, "bottom": null, "_model_module": "@jupyter-widgets/base", "top": null, "grid_column": null, "overflow_y": null, "overflow_x": null, "grid_auto_flow": null, "grid_area": null, "grid_template_columns": null, "flex": null, "_model_name": "LayoutModel", "justify_items": null, "grid_row": null, "max_height": null, "align_content": null, "visibility": null, "align_self": null, "height": null, "min_height": null, "padding": null, "grid_auto_rows": null, "grid_gap": null, "max_width": null, "order": null, "_view_module_version": "1.2.0", "grid_template_areas": null, "object_position": null, "object_fit": null, "grid_auto_columns": null, "margin": null, "display": null, "left": null } }, "d0fce87f97b945a4ba7a03e420daffd4": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "state": { "_view_name": "StyleView", "_model_name": "DescriptionStyleModel", "description_width": "", "_view_module": "@jupyter-widgets/base", "_model_module_version": "1.5.0", "_view_count": null, "_view_module_version": "1.2.0", "_model_module": "@jupyter-widgets/controls" } }, "50a7c5a2d24e47faa9072fe21d0ae396": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "state": { "_view_name": "LayoutView", "grid_template_rows": null, "right": null, "justify_content": null, "_view_module": "@jupyter-widgets/base", "overflow": null, "_model_module_version": "1.2.0", "_view_count": null, "flex_flow": null, "width": null, "min_width": null, "border": null, "align_items": null, "bottom": null, "_model_module": "@jupyter-widgets/base", "top": null, "grid_column": null, "overflow_y": null, "overflow_x": null, "grid_auto_flow": null, "grid_area": null, "grid_template_columns": null, "flex": null, "_model_name": "LayoutModel", "justify_items": null, "grid_row": null, "max_height": null, "align_content": null, "visibility": null, "align_self": null, "height": null, "min_height": null, "padding": null, "grid_auto_rows": null, "grid_gap": null, "max_width": null, "order": null, "_view_module_version": "1.2.0", "grid_template_areas": null, "object_position": null, "object_fit": null, "grid_auto_columns": null, "margin": null, "display": null, "left": null } }, "a44971002aea4b1d8522d8a8b80fea53": { "model_module": "@jupyter-widgets/controls", "model_name": "HBoxModel", "state": { "_view_name": "HBoxView", "_dom_classes": [], "_model_name": "HBoxModel", "_view_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_view_count": null, "_view_module_version": "1.5.0", "box_style": "", "layout": "IPY_MODEL_6f0c82be1c7a4ce4a07728e4dc977b6f", "_model_module": "@jupyter-widgets/controls", "children": [ "IPY_MODEL_83e9d626e89441248dae96107c0cd4fd", "IPY_MODEL_78e2abbc309b4e969cc817a31df62816" ] } }, "6f0c82be1c7a4ce4a07728e4dc977b6f": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "state": { "_view_name": "LayoutView", "grid_template_rows": null, "right": null, "justify_content": null, "_view_module": "@jupyter-widgets/base", "overflow": null, "_model_module_version": "1.2.0", "_view_count": null, "flex_flow": null, "width": null, "min_width": null, "border": null, "align_items": null, "bottom": null, "_model_module": "@jupyter-widgets/base", "top": null, "grid_column": null, "overflow_y": null, "overflow_x": null, "grid_auto_flow": null, "grid_area": null, "grid_template_columns": null, "flex": null, "_model_name": "LayoutModel", "justify_items": null, "grid_row": null, "max_height": null, "align_content": null, "visibility": null, "align_self": null, "height": null, "min_height": null, "padding": null, "grid_auto_rows": null, "grid_gap": null, "max_width": null, "order": null, "_view_module_version": "1.2.0", "grid_template_areas": null, "object_position": null, "object_fit": null, "grid_auto_columns": null, "margin": null, "display": null, "left": null } }, "83e9d626e89441248dae96107c0cd4fd": { "model_module": "@jupyter-widgets/controls", "model_name": "FloatProgressModel", "state": { "_view_name": "ProgressView", "style": "IPY_MODEL_b89fea15996a48199a619ad33c59f7eb", "_dom_classes": [], "description": "Downloading: 100%", "_model_name": "FloatProgressModel", "bar_style": "success", "max": 494801, "_view_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "value": 494801, "_view_count": null, "_view_module_version": "1.5.0", "orientation": "horizontal", "min": 0, "description_tooltip": null, "_model_module": "@jupyter-widgets/controls", "layout": "IPY_MODEL_2e5d93f934684f2f80e13e46bb656b5d" } }, "78e2abbc309b4e969cc817a31df62816": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "state": { "_view_name": "HTMLView", "style": "IPY_MODEL_73432aa5e68d494fa462fd7cee2d3e52", "_dom_classes": [], "description": "", "_model_name": "HTMLModel", "placeholder": "", "_view_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "value": " 495k/495k [00:01<00:00, 453kB/s]", "_view_count": null, "_view_module_version": "1.5.0", "description_tooltip": null, "_model_module": "@jupyter-widgets/controls", "layout": "IPY_MODEL_7bc5cf59488e4c378e565f7a9fe39a24" } }, "b89fea15996a48199a619ad33c59f7eb": { "model_module": "@jupyter-widgets/controls", "model_name": "ProgressStyleModel", "state": { "_view_name": "StyleView", "_model_name": "ProgressStyleModel", "description_width": "initial", "_view_module": "@jupyter-widgets/base", "_model_module_version": "1.5.0", "_view_count": null, "_view_module_version": "1.2.0", "bar_color": null, "_model_module": "@jupyter-widgets/controls" } }, "2e5d93f934684f2f80e13e46bb656b5d": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "state": { "_view_name": "LayoutView", "grid_template_rows": null, "right": null, "justify_content": null, "_view_module": "@jupyter-widgets/base", "overflow": null, "_model_module_version": "1.2.0", "_view_count": null, "flex_flow": null, "width": null, "min_width": null, "border": null, "align_items": null, "bottom": null, "_model_module": "@jupyter-widgets/base", "top": null, "grid_column": null, "overflow_y": null, "overflow_x": null, "grid_auto_flow": null, "grid_area": null, "grid_template_columns": null, "flex": null, "_model_name": "LayoutModel", "justify_items": null, "grid_row": null, "max_height": null, "align_content": null, "visibility": null, "align_self": null, "height": null, "min_height": null, "padding": null, "grid_auto_rows": null, "grid_gap": null, "max_width": null, "order": null, "_view_module_version": "1.2.0", "grid_template_areas": null, "object_position": null, "object_fit": null, "grid_auto_columns": null, "margin": null, "display": null, "left": null } }, "73432aa5e68d494fa462fd7cee2d3e52": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "state": { "_view_name": "StyleView", "_model_name": "DescriptionStyleModel", "description_width": "", "_view_module": "@jupyter-widgets/base", "_model_module_version": "1.5.0", "_view_count": null, "_view_module_version": "1.2.0", "_model_module": "@jupyter-widgets/controls" } }, "7bc5cf59488e4c378e565f7a9fe39a24": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "state": { "_view_name": "LayoutView", "grid_template_rows": null, "right": null, "justify_content": null, "_view_module": "@jupyter-widgets/base", "overflow": null, "_model_module_version": "1.2.0", "_view_count": null, "flex_flow": null, "width": null, "min_width": null, "border": null, "align_items": null, "bottom": null, "_model_module": "@jupyter-widgets/base", "top": null, "grid_column": null, "overflow_y": null, "overflow_x": null, "grid_auto_flow": null, "grid_area": null, "grid_template_columns": null, "flex": null, "_model_name": "LayoutModel", "justify_items": null, "grid_row": null, "max_height": null, "align_content": null, "visibility": null, "align_self": null, "height": null, "min_height": null, "padding": null, "grid_auto_rows": null, "grid_gap": null, "max_width": null, "order": null, "_view_module_version": "1.2.0", "grid_template_areas": null, "object_position": null, "object_fit": null, "grid_auto_columns": null, "margin": null, "display": null, "left": null } }, "62f38bd0655c4245b07fe599410eb710": { "model_module": "@jupyter-widgets/controls", "model_name": "HBoxModel", "state": { "_view_name": "HBoxView", "_dom_classes": [], "_model_name": "HBoxModel", "_view_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_view_count": null, "_view_module_version": "1.5.0", "box_style": "", "layout": "IPY_MODEL_b11f8c86af064ab892594d8f995ce055", "_model_module": "@jupyter-widgets/controls", "children": [ "IPY_MODEL_899847b165d7410eab46fbe2b429b8f9", "IPY_MODEL_0d8f91e9052842939a374af67b833d07" ] } }, "b11f8c86af064ab892594d8f995ce055": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "state": { "_view_name": "LayoutView", "grid_template_rows": null, "right": null, "justify_content": null, "_view_module": "@jupyter-widgets/base", "overflow": null, "_model_module_version": "1.2.0", "_view_count": null, "flex_flow": null, "width": null, "min_width": null, "border": null, "align_items": null, "bottom": null, "_model_module": "@jupyter-widgets/base", "top": null, "grid_column": null, "overflow_y": null, "overflow_x": null, "grid_auto_flow": null, "grid_area": null, "grid_template_columns": null, "flex": null, "_model_name": "LayoutModel", "justify_items": null, "grid_row": null, "max_height": null, "align_content": null, "visibility": null, "align_self": null, "height": null, "min_height": null, "padding": null, "grid_auto_rows": null, "grid_gap": null, "max_width": null, "order": null, "_view_module_version": "1.2.0", "grid_template_areas": null, "object_position": null, "object_fit": null, "grid_auto_columns": null, "margin": null, "display": null, "left": null } }, "899847b165d7410eab46fbe2b429b8f9": { "model_module": "@jupyter-widgets/controls", "model_name": "FloatProgressModel", "state": { "_view_name": "ProgressView", "style": "IPY_MODEL_749936a7630d4e8a95cca5f067dfff7b", "_dom_classes": [], "description": "Downloading: 100%", "_model_name": "FloatProgressModel", "bar_style": "success", "max": 112, "_view_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "value": 112, "_view_count": null, "_view_module_version": "1.5.0", "orientation": "horizontal", "min": 0, "description_tooltip": null, "_model_module": "@jupyter-widgets/controls", "layout": "IPY_MODEL_4f5ede7c4ed744239e23ac1a5b4e5640" } }, "0d8f91e9052842939a374af67b833d07": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "state": { "_view_name": "HTMLView", "style": "IPY_MODEL_b23492319c044c738cdb3ae92d253b37", "_dom_classes": [], "description": "", "_model_name": "HTMLModel", "placeholder": "", "_view_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "value": " 112/112 [00:00<00:00, 359B/s]", "_view_count": null, "_view_module_version": "1.5.0", "description_tooltip": null, "_model_module": "@jupyter-widgets/controls", "layout": "IPY_MODEL_5787971cfb424a33a941f8b417c5ad43" } }, "749936a7630d4e8a95cca5f067dfff7b": { "model_module": "@jupyter-widgets/controls", "model_name": "ProgressStyleModel", "state": { "_view_name": "StyleView", "_model_name": "ProgressStyleModel", "description_width": "initial", "_view_module": "@jupyter-widgets/base", "_model_module_version": "1.5.0", "_view_count": null, "_view_module_version": "1.2.0", "bar_color": null, "_model_module": "@jupyter-widgets/controls" } }, "4f5ede7c4ed744239e23ac1a5b4e5640": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "state": { "_view_name": "LayoutView", "grid_template_rows": null, "right": null, "justify_content": null, "_view_module": "@jupyter-widgets/base", "overflow": null, "_model_module_version": "1.2.0", "_view_count": null, "flex_flow": null, "width": null, "min_width": null, "border": null, "align_items": null, "bottom": null, "_model_module": "@jupyter-widgets/base", "top": null, "grid_column": null, "overflow_y": null, "overflow_x": null, "grid_auto_flow": null, "grid_area": null, "grid_template_columns": null, "flex": null, "_model_name": "LayoutModel", "justify_items": null, "grid_row": null, "max_height": null, "align_content": null, "visibility": null, "align_self": null, "height": null, "min_height": null, "padding": null, "grid_auto_rows": null, "grid_gap": null, "max_width": null, "order": null, "_view_module_version": "1.2.0", "grid_template_areas": null, "object_position": null, "object_fit": null, "grid_auto_columns": null, "margin": null, "display": null, "left": null } }, "b23492319c044c738cdb3ae92d253b37": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "state": { "_view_name": "StyleView", "_model_name": "DescriptionStyleModel", "description_width": "", "_view_module": "@jupyter-widgets/base", "_model_module_version": "1.5.0", "_view_count": null, "_view_module_version": "1.2.0", "_model_module": "@jupyter-widgets/controls" } }, "5787971cfb424a33a941f8b417c5ad43": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "state": { "_view_name": "LayoutView", "grid_template_rows": null, "right": null, "justify_content": null, "_view_module": "@jupyter-widgets/base", "overflow": null, "_model_module_version": "1.2.0", "_view_count": null, "flex_flow": null, "width": null, "min_width": null, "border": null, "align_items": null, "bottom": null, "_model_module": "@jupyter-widgets/base", "top": null, "grid_column": null, "overflow_y": null, "overflow_x": null, "grid_auto_flow": null, "grid_area": null, "grid_template_columns": null, "flex": null, "_model_name": "LayoutModel", "justify_items": null, "grid_row": null, "max_height": null, "align_content": null, "visibility": null, "align_self": null, "height": null, "min_height": null, "padding": null, "grid_auto_rows": null, "grid_gap": null, "max_width": null, "order": null, "_view_module_version": "1.2.0", "grid_template_areas": null, "object_position": null, "object_fit": null, "grid_auto_columns": null, "margin": null, "display": null, "left": null } }, "a8557510cebe431a9ba99962d4e513fe": { "model_module": "@jupyter-widgets/controls", "model_name": "HBoxModel", "state": { "_view_name": "HBoxView", "_dom_classes": [], "_model_name": "HBoxModel", "_view_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_view_count": null, "_view_module_version": "1.5.0", "box_style": "", "layout": "IPY_MODEL_18efcae012d44802ad3d39cee7a48101", "_model_module": "@jupyter-widgets/controls", "children": [ "IPY_MODEL_be90e73566c146d79afb42899bfc5caf", "IPY_MODEL_8da22b40eb2147469604c8c03fc79a29" ] } }, "18efcae012d44802ad3d39cee7a48101": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "state": { "_view_name": "LayoutView", "grid_template_rows": null, "right": null, "justify_content": null, "_view_module": "@jupyter-widgets/base", "overflow": null, "_model_module_version": "1.2.0", "_view_count": null, "flex_flow": null, "width": null, "min_width": null, "border": null, "align_items": null, "bottom": null, "_model_module": "@jupyter-widgets/base", "top": null, "grid_column": null, "overflow_y": null, "overflow_x": null, "grid_auto_flow": null, "grid_area": null, "grid_template_columns": null, "flex": null, "_model_name": "LayoutModel", "justify_items": null, "grid_row": null, "max_height": null, "align_content": null, "visibility": null, "align_self": null, "height": null, "min_height": null, "padding": null, "grid_auto_rows": null, "grid_gap": null, "max_width": null, "order": null, "_view_module_version": "1.2.0", "grid_template_areas": null, "object_position": null, "object_fit": null, "grid_auto_columns": null, "margin": null, "display": null, "left": null } }, "be90e73566c146d79afb42899bfc5caf": { "model_module": "@jupyter-widgets/controls", "model_name": "FloatProgressModel", "state": { "_view_name": "ProgressView", "style": "IPY_MODEL_0c4e8b6f44934856a716bbc06a8bc628", "_dom_classes": [], "description": "Downloading: 100%", "_model_name": "FloatProgressModel", "bar_style": "success", "max": 2, "_view_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "value": 2, "_view_count": null, "_view_module_version": "1.5.0", "orientation": "horizontal", "min": 0, "description_tooltip": null, "_model_module": "@jupyter-widgets/controls", "layout": "IPY_MODEL_7f44a86d5d144f8e8350760b6071e0d4" } }, "8da22b40eb2147469604c8c03fc79a29": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "state": { "_view_name": "HTMLView", "style": "IPY_MODEL_e0204efda0cb4af5b788d3f7568be9c2", "_dom_classes": [], "description": "", "_model_name": "HTMLModel", "placeholder": "", "_view_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "value": " 2.00/2.00 [00:00<00:00, 28.6B/s]", "_view_count": null, "_view_module_version": "1.5.0", "description_tooltip": null, "_model_module": "@jupyter-widgets/controls", "layout": "IPY_MODEL_55b7d60cf88d47e2bf88d11efd3cacc1" } }, "0c4e8b6f44934856a716bbc06a8bc628": { "model_module": "@jupyter-widgets/controls", "model_name": "ProgressStyleModel", "state": { "_view_name": "StyleView", "_model_name": "ProgressStyleModel", "description_width": "initial", "_view_module": "@jupyter-widgets/base", "_model_module_version": "1.5.0", "_view_count": null, "_view_module_version": "1.2.0", "bar_color": null, "_model_module": "@jupyter-widgets/controls" } }, "7f44a86d5d144f8e8350760b6071e0d4": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "state": { "_view_name": "LayoutView", "grid_template_rows": null, "right": null, "justify_content": null, "_view_module": "@jupyter-widgets/base", "overflow": null, "_model_module_version": "1.2.0", "_view_count": null, "flex_flow": null, "width": null, "min_width": null, "border": null, "align_items": null, "bottom": null, "_model_module": "@jupyter-widgets/base", "top": null, "grid_column": null, "overflow_y": null, "overflow_x": null, "grid_auto_flow": null, "grid_area": null, "grid_template_columns": null, "flex": null, "_model_name": "LayoutModel", "justify_items": null, "grid_row": null, "max_height": null, "align_content": null, "visibility": null, "align_self": null, "height": null, "min_height": null, "padding": null, "grid_auto_rows": null, "grid_gap": null, "max_width": null, "order": null, "_view_module_version": "1.2.0", "grid_template_areas": null, "object_position": null, "object_fit": null, "grid_auto_columns": null, "margin": null, "display": null, "left": null } }, "e0204efda0cb4af5b788d3f7568be9c2": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "state": { "_view_name": "StyleView", "_model_name": "DescriptionStyleModel", "description_width": "", "_view_module": "@jupyter-widgets/base", "_model_module_version": "1.5.0", "_view_count": null, "_view_module_version": "1.2.0", "_model_module": "@jupyter-widgets/controls" } }, "55b7d60cf88d47e2bf88d11efd3cacc1": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "state": { "_view_name": "LayoutView", "grid_template_rows": null, "right": null, "justify_content": null, "_view_module": "@jupyter-widgets/base", "overflow": null, "_model_module_version": "1.2.0", "_view_count": null, "flex_flow": null, "width": null, "min_width": null, "border": null, "align_items": null, "bottom": null, "_model_module": "@jupyter-widgets/base", "top": null, "grid_column": null, "overflow_y": null, "overflow_x": null, "grid_auto_flow": null, "grid_area": null, "grid_template_columns": null, "flex": null, "_model_name": "LayoutModel", "justify_items": null, "grid_row": null, "max_height": null, "align_content": null, "visibility": null, "align_self": null, "height": null, "min_height": null, "padding": null, "grid_auto_rows": null, "grid_gap": null, "max_width": null, "order": null, "_view_module_version": "1.2.0", "grid_template_areas": null, "object_position": null, "object_fit": null, "grid_auto_columns": null, "margin": null, "display": null, "left": null } }, "50acdd7b70d9481b9be636b2322b2cfb": { "model_module": "@jupyter-widgets/controls", "model_name": "HBoxModel", "state": { "_view_name": "HBoxView", "_dom_classes": [], "_model_name": "HBoxModel", "_view_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_view_count": null, "_view_module_version": "1.5.0", "box_style": "", "layout": "IPY_MODEL_78070d0410a44081a1da48250ee4f51c", "_model_module": "@jupyter-widgets/controls", "children": [ "IPY_MODEL_fa3b5430520b49879f920b63d2000717", "IPY_MODEL_a648effd76094b45a830d12c600ebd97" ] } }, "78070d0410a44081a1da48250ee4f51c": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "state": { "_view_name": "LayoutView", "grid_template_rows": null, "right": null, "justify_content": null, "_view_module": "@jupyter-widgets/base", "overflow": null, "_model_module_version": "1.2.0", "_view_count": null, "flex_flow": null, "width": null, "min_width": null, "border": null, "align_items": null, "bottom": null, "_model_module": "@jupyter-widgets/base", "top": null, "grid_column": null, "overflow_y": null, "overflow_x": null, "grid_auto_flow": null, "grid_area": null, "grid_template_columns": null, "flex": null, "_model_name": "LayoutModel", "justify_items": null, "grid_row": null, "max_height": null, "align_content": null, "visibility": null, "align_self": null, "height": null, "min_height": null, "padding": null, "grid_auto_rows": null, "grid_gap": null, "max_width": null, "order": null, "_view_module_version": "1.2.0", "grid_template_areas": null, "object_position": null, "object_fit": null, "grid_auto_columns": null, "margin": null, "display": null, "left": null } }, "fa3b5430520b49879f920b63d2000717": { "model_module": "@jupyter-widgets/controls", "model_name": "FloatProgressModel", "state": { "_view_name": "ProgressView", "style": "IPY_MODEL_e8539425181b448e9b24bcc29375408e", "_dom_classes": [], "description": "Downloading: ", "_model_name": "FloatProgressModel", "bar_style": "success", "max": 927, "_view_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "value": 927, "_view_count": null, "_view_module_version": "1.5.0", "orientation": "horizontal", "min": 0, "description_tooltip": null, "_model_module": "@jupyter-widgets/controls", "layout": "IPY_MODEL_3a2d533ca85a4b7ba2df2c0ffd07603f" } }, "a648effd76094b45a830d12c600ebd97": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "state": { "_view_name": "HTMLView", "style": "IPY_MODEL_dda001949a5042cc9f5e8440d7f3f812", "_dom_classes": [], "description": "", "_model_name": "HTMLModel", "placeholder": "", "_view_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "value": " 3.08k/? [00:00<00:00, 4.59kB/s]", "_view_count": null, "_view_module_version": "1.5.0", "description_tooltip": null, "_model_module": "@jupyter-widgets/controls", "layout": "IPY_MODEL_0b600c9bf631436a9b22231f4aae18b3" } }, "e8539425181b448e9b24bcc29375408e": { "model_module": "@jupyter-widgets/controls", "model_name": "ProgressStyleModel", "state": { "_view_name": "StyleView", "_model_name": "ProgressStyleModel", "description_width": "initial", "_view_module": "@jupyter-widgets/base", "_model_module_version": "1.5.0", "_view_count": null, "_view_module_version": "1.2.0", "bar_color": null, "_model_module": "@jupyter-widgets/controls" } }, "3a2d533ca85a4b7ba2df2c0ffd07603f": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "state": { "_view_name": "LayoutView", "grid_template_rows": null, "right": null, "justify_content": null, "_view_module": "@jupyter-widgets/base", "overflow": null, "_model_module_version": "1.2.0", "_view_count": null, "flex_flow": null, "width": null, "min_width": null, "border": null, "align_items": null, "bottom": null, "_model_module": "@jupyter-widgets/base", "top": null, "grid_column": null, "overflow_y": null, "overflow_x": null, "grid_auto_flow": null, "grid_area": null, "grid_template_columns": null, "flex": null, "_model_name": "LayoutModel", "justify_items": null, "grid_row": null, "max_height": null, "align_content": null, "visibility": null, "align_self": null, "height": null, "min_height": null, "padding": null, "grid_auto_rows": null, "grid_gap": null, "max_width": null, "order": null, "_view_module_version": "1.2.0", "grid_template_areas": null, "object_position": null, "object_fit": null, "grid_auto_columns": null, "margin": null, "display": null, "left": null } }, "dda001949a5042cc9f5e8440d7f3f812": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "state": { "_view_name": "StyleView", "_model_name": "DescriptionStyleModel", "description_width": "", "_view_module": "@jupyter-widgets/base", "_model_module_version": "1.5.0", "_view_count": null, "_view_module_version": "1.2.0", "_model_module": "@jupyter-widgets/controls" } }, "0b600c9bf631436a9b22231f4aae18b3": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "state": { "_view_name": "LayoutView", "grid_template_rows": null, "right": null, "justify_content": null, "_view_module": "@jupyter-widgets/base", "overflow": null, "_model_module_version": "1.2.0", "_view_count": null, "flex_flow": null, "width": null, "min_width": null, "border": null, "align_items": null, "bottom": null, "_model_module": "@jupyter-widgets/base", "top": null, "grid_column": null, "overflow_y": null, "overflow_x": null, "grid_auto_flow": null, "grid_area": null, "grid_template_columns": null, "flex": null, "_model_name": "LayoutModel", "justify_items": null, "grid_row": null, "max_height": null, "align_content": null, "visibility": null, "align_self": null, "height": null, "min_height": null, "padding": null, "grid_auto_rows": null, "grid_gap": null, "max_width": null, "order": null, "_view_module_version": "1.2.0", "grid_template_areas": null, "object_position": null, "object_fit": null, "grid_auto_columns": null, "margin": null, "display": null, "left": null } }, "040a1e96d09d40f7b9d74a6e1601d1bd": { "model_module": "@jupyter-widgets/controls", "model_name": "HBoxModel", "state": { "_view_name": "HBoxView", "_dom_classes": [], "_model_name": "HBoxModel", "_view_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_view_count": null, "_view_module_version": "1.5.0", "box_style": "", "layout": "IPY_MODEL_a99293552d4d4a328d7158e19b3c8a6f", "_model_module": "@jupyter-widgets/controls", "children": [ "IPY_MODEL_93d82eae199e4f85bcc5ff296413d11a", "IPY_MODEL_170b35007ee74a81a1a2cf6eec47db3b" ] } }, "a99293552d4d4a328d7158e19b3c8a6f": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "state": { "_view_name": "LayoutView", "grid_template_rows": null, "right": null, "justify_content": null, "_view_module": "@jupyter-widgets/base", "overflow": null, "_model_module_version": "1.2.0", "_view_count": null, "flex_flow": null, "width": null, "min_width": null, "border": null, "align_items": null, "bottom": null, "_model_module": "@jupyter-widgets/base", "top": null, "grid_column": null, "overflow_y": null, "overflow_x": null, "grid_auto_flow": null, "grid_area": null, "grid_template_columns": null, "flex": null, "_model_name": "LayoutModel", "justify_items": null, "grid_row": null, "max_height": null, "align_content": null, "visibility": null, "align_self": null, "height": null, "min_height": null, "padding": null, "grid_auto_rows": null, "grid_gap": null, "max_width": null, "order": null, "_view_module_version": "1.2.0", "grid_template_areas": null, "object_position": null, "object_fit": null, "grid_auto_columns": null, "margin": null, "display": null, "left": null } }, "93d82eae199e4f85bcc5ff296413d11a": { "model_module": "@jupyter-widgets/controls", "model_name": "FloatProgressModel", "state": { "_view_name": "ProgressView", "style": "IPY_MODEL_316e78f7695646ef901264d4e37f8e93", "_dom_classes": [], "description": "", "_model_name": "FloatProgressModel", "bar_style": "info", "max": 1, "_view_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "value": 1, "_view_count": null, "_view_module_version": "1.5.0", "orientation": "horizontal", "min": 0, "description_tooltip": null, "_model_module": "@jupyter-widgets/controls", "layout": "IPY_MODEL_03253f9a96524c86bfa7703d04f9dd6a" } }, "170b35007ee74a81a1a2cf6eec47db3b": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "state": { "_view_name": "HTMLView", "style": "IPY_MODEL_7ba6458b655f4c82b3af66bf9f9dcfaf", "_dom_classes": [], "description": "", "_model_name": "HTMLModel", "placeholder": "", "_view_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "value": " 1/? [00:00<00:00, 30.96 tables/s]", "_view_count": null, "_view_module_version": "1.5.0", "description_tooltip": null, "_model_module": "@jupyter-widgets/controls", "layout": "IPY_MODEL_059efec274ba40f29470d1a2dd02a33f" } }, "316e78f7695646ef901264d4e37f8e93": { "model_module": "@jupyter-widgets/controls", "model_name": "ProgressStyleModel", "state": { "_view_name": "StyleView", "_model_name": "ProgressStyleModel", "description_width": "initial", "_view_module": "@jupyter-widgets/base", "_model_module_version": "1.5.0", "_view_count": null, "_view_module_version": "1.2.0", "bar_color": null, "_model_module": "@jupyter-widgets/controls" } }, "03253f9a96524c86bfa7703d04f9dd6a": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "state": { "_view_name": "LayoutView", "grid_template_rows": null, "right": null, "justify_content": null, "_view_module": "@jupyter-widgets/base", "overflow": null, "_model_module_version": "1.2.0", "_view_count": null, "flex_flow": null, "width": null, "min_width": null, "border": null, "align_items": null, "bottom": null, "_model_module": "@jupyter-widgets/base", "top": null, "grid_column": null, "overflow_y": null, "overflow_x": null, "grid_auto_flow": null, "grid_area": null, "grid_template_columns": null, "flex": null, "_model_name": "LayoutModel", "justify_items": null, "grid_row": null, "max_height": null, "align_content": null, "visibility": null, "align_self": null, "height": null, "min_height": null, "padding": null, "grid_auto_rows": null, "grid_gap": null, "max_width": null, "order": null, "_view_module_version": "1.2.0", "grid_template_areas": null, "object_position": null, "object_fit": null, "grid_auto_columns": null, "margin": null, "display": null, "left": null } }, "7ba6458b655f4c82b3af66bf9f9dcfaf": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "state": { "_view_name": "StyleView", "_model_name": "DescriptionStyleModel", "description_width": "", "_view_module": "@jupyter-widgets/base", "_model_module_version": "1.5.0", "_view_count": null, "_view_module_version": "1.2.0", "_model_module": "@jupyter-widgets/controls" } }, "059efec274ba40f29470d1a2dd02a33f": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "state": { "_view_name": "LayoutView", "grid_template_rows": null, "right": null, "justify_content": null, "_view_module": "@jupyter-widgets/base", "overflow": null, "_model_module_version": "1.2.0", "_view_count": null, "flex_flow": null, "width": null, "min_width": null, "border": null, "align_items": null, "bottom": null, "_model_module": "@jupyter-widgets/base", "top": null, "grid_column": null, "overflow_y": null, "overflow_x": null, "grid_auto_flow": null, "grid_area": null, "grid_template_columns": null, "flex": null, "_model_name": "LayoutModel", "justify_items": null, "grid_row": null, "max_height": null, "align_content": null, "visibility": null, "align_self": null, "height": null, "min_height": null, "padding": null, "grid_auto_rows": null, "grid_gap": null, "max_width": null, "order": null, "_view_module_version": "1.2.0", "grid_template_areas": null, "object_position": null, "object_fit": null, "grid_auto_columns": null, "margin": null, "display": null, "left": null } }, "31c2adcad6154b05adda222d88f39e67": { "model_module": "@jupyter-widgets/controls", "model_name": "HBoxModel", "state": { "_view_name": "HBoxView", "_dom_classes": [], "_model_name": "HBoxModel", "_view_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_view_count": null, "_view_module_version": "1.5.0", "box_style": "", "layout": "IPY_MODEL_4bac2eaf402d40f9b8e81786278eddb0", "_model_module": "@jupyter-widgets/controls", "children": [ "IPY_MODEL_ca5943a12af54c498a67505dd4bce7f1", "IPY_MODEL_cdafe06d43284e10961ba6f8d4c05eaf" ] } }, "4bac2eaf402d40f9b8e81786278eddb0": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "state": { "_view_name": "LayoutView", "grid_template_rows": null, "right": null, "justify_content": null, "_view_module": "@jupyter-widgets/base", "overflow": null, "_model_module_version": "1.2.0", "_view_count": null, "flex_flow": null, "width": null, "min_width": null, "border": null, "align_items": null, "bottom": null, "_model_module": "@jupyter-widgets/base", "top": null, "grid_column": null, "overflow_y": null, "overflow_x": null, "grid_auto_flow": null, "grid_area": null, "grid_template_columns": null, "flex": null, "_model_name": "LayoutModel", "justify_items": null, "grid_row": null, "max_height": null, "align_content": null, "visibility": null, "align_self": null, "height": null, "min_height": null, "padding": null, "grid_auto_rows": null, "grid_gap": null, "max_width": null, "order": null, "_view_module_version": "1.2.0", "grid_template_areas": null, "object_position": null, "object_fit": null, "grid_auto_columns": null, "margin": null, "display": null, "left": null } }, "ca5943a12af54c498a67505dd4bce7f1": { "model_module": "@jupyter-widgets/controls", "model_name": "FloatProgressModel", "state": { "_view_name": "ProgressView", "style": "IPY_MODEL_6a19bd3eae2a43e5a3b0816999cb57ab", "_dom_classes": [], "description": "100%", "_model_name": "FloatProgressModel", "bar_style": "success", "max": 1, "_view_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "value": 1, "_view_count": null, "_view_module_version": "1.5.0", "orientation": "horizontal", "min": 0, "description_tooltip": null, "_model_module": "@jupyter-widgets/controls", "layout": "IPY_MODEL_55da41e1604f4e1a81a2c84a9ab98623" } }, "cdafe06d43284e10961ba6f8d4c05eaf": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "state": { "_view_name": "HTMLView", "style": "IPY_MODEL_5c9011bd9207477cb22b58095ee5e2c4", "_dom_classes": [], "description": "", "_model_name": "HTMLModel", "placeholder": "", "_view_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "value": " 1/1 [02:37<00:00, 157.23s/ba]", "_view_count": null, "_view_module_version": "1.5.0", "description_tooltip": null, "_model_module": "@jupyter-widgets/controls", "layout": "IPY_MODEL_d39668516e3f4f8cb36b4af3f349bf2f" } }, "6a19bd3eae2a43e5a3b0816999cb57ab": { "model_module": "@jupyter-widgets/controls", "model_name": "ProgressStyleModel", "state": { "_view_name": "StyleView", "_model_name": "ProgressStyleModel", "description_width": "initial", "_view_module": "@jupyter-widgets/base", "_model_module_version": "1.5.0", "_view_count": null, "_view_module_version": "1.2.0", "bar_color": null, "_model_module": "@jupyter-widgets/controls" } }, "55da41e1604f4e1a81a2c84a9ab98623": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "state": { "_view_name": "LayoutView", "grid_template_rows": null, "right": null, "justify_content": null, "_view_module": "@jupyter-widgets/base", "overflow": null, "_model_module_version": "1.2.0", "_view_count": null, "flex_flow": null, "width": null, "min_width": null, "border": null, "align_items": null, "bottom": null, "_model_module": "@jupyter-widgets/base", "top": null, "grid_column": null, "overflow_y": null, "overflow_x": null, "grid_auto_flow": null, "grid_area": null, "grid_template_columns": null, "flex": null, "_model_name": "LayoutModel", "justify_items": null, "grid_row": null, "max_height": null, "align_content": null, "visibility": null, "align_self": null, "height": null, "min_height": null, "padding": null, "grid_auto_rows": null, "grid_gap": null, "max_width": null, "order": null, "_view_module_version": "1.2.0", "grid_template_areas": null, "object_position": null, "object_fit": null, "grid_auto_columns": null, "margin": null, "display": null, "left": null } }, "5c9011bd9207477cb22b58095ee5e2c4": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "state": { "_view_name": "StyleView", "_model_name": "DescriptionStyleModel", "description_width": "", "_view_module": "@jupyter-widgets/base", "_model_module_version": "1.5.0", "_view_count": null, "_view_module_version": "1.2.0", "_model_module": "@jupyter-widgets/controls" } }, "d39668516e3f4f8cb36b4af3f349bf2f": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "state": { "_view_name": "LayoutView", "grid_template_rows": null, "right": null, "justify_content": null, "_view_module": "@jupyter-widgets/base", "overflow": null, "_model_module_version": "1.2.0", "_view_count": null, "flex_flow": null, "width": null, "min_width": null, "border": null, "align_items": null, "bottom": null, "_model_module": "@jupyter-widgets/base", "top": null, "grid_column": null, "overflow_y": null, "overflow_x": null, "grid_auto_flow": null, "grid_area": null, "grid_template_columns": null, "flex": null, "_model_name": "LayoutModel", "justify_items": null, "grid_row": null, "max_height": null, "align_content": null, "visibility": null, "align_self": null, "height": null, "min_height": null, "padding": null, "grid_auto_rows": null, "grid_gap": null, "max_width": null, "order": null, "_view_module_version": "1.2.0", "grid_template_areas": null, "object_position": null, "object_fit": null, "grid_auto_columns": null, "margin": null, "display": null, "left": null } }, "7ead312be00c423fab6b00f24563c350": { "model_module": "@jupyter-widgets/controls", "model_name": "HBoxModel", "state": { "_view_name": "HBoxView", "_dom_classes": [], "_model_name": "HBoxModel", "_view_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_view_count": null, "_view_module_version": "1.5.0", "box_style": "", "layout": "IPY_MODEL_7487cc3bc4384e63b3877b43f29def80", "_model_module": "@jupyter-widgets/controls", "children": [ "IPY_MODEL_8729216f7c3145cd8f17b9ef648ab998", "IPY_MODEL_c179bb72f9414503bfe6c0ca8812d75a" ] } }, "7487cc3bc4384e63b3877b43f29def80": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "state": { "_view_name": "LayoutView", "grid_template_rows": null, "right": null, "justify_content": null, "_view_module": "@jupyter-widgets/base", "overflow": null, "_model_module_version": "1.2.0", "_view_count": null, "flex_flow": null, "width": null, "min_width": null, "border": null, "align_items": null, "bottom": null, "_model_module": "@jupyter-widgets/base", "top": null, "grid_column": null, "overflow_y": null, "overflow_x": null, "grid_auto_flow": null, "grid_area": null, "grid_template_columns": null, "flex": null, "_model_name": "LayoutModel", "justify_items": null, "grid_row": null, "max_height": null, "align_content": null, "visibility": null, "align_self": null, "height": null, "min_height": null, "padding": null, "grid_auto_rows": null, "grid_gap": null, "max_width": null, "order": null, "_view_module_version": "1.2.0", "grid_template_areas": null, "object_position": null, "object_fit": null, "grid_auto_columns": null, "margin": null, "display": null, "left": null } }, "8729216f7c3145cd8f17b9ef648ab998": { "model_module": "@jupyter-widgets/controls", "model_name": "FloatProgressModel", "state": { "_view_name": "ProgressView", "style": "IPY_MODEL_ecb218a3c4644fb293b64f2f8e75996b", "_dom_classes": [], "description": "100%", "_model_name": "FloatProgressModel", "bar_style": "success", "max": 1, "_view_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "value": 1, "_view_count": null, "_view_module_version": "1.5.0", "orientation": "horizontal", "min": 0, "description_tooltip": null, "_model_module": "@jupyter-widgets/controls", "layout": "IPY_MODEL_e91005c51c12401ca1861403412ffa77" } }, "c179bb72f9414503bfe6c0ca8812d75a": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "state": { "_view_name": "HTMLView", "style": "IPY_MODEL_547bb74ff9d4442dbedf16f3d744fc63", "_dom_classes": [], "description": "", "_model_name": "HTMLModel", "placeholder": "", "_view_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "value": " 1/1 [02:36<00:00, 156.51s/ba]", "_view_count": null, "_view_module_version": "1.5.0", "description_tooltip": null, "_model_module": "@jupyter-widgets/controls", "layout": "IPY_MODEL_f9bd5f9433de4b6fa6ac4f1c4a8ed193" } }, "ecb218a3c4644fb293b64f2f8e75996b": { "model_module": "@jupyter-widgets/controls", "model_name": "ProgressStyleModel", "state": { "_view_name": "StyleView", "_model_name": "ProgressStyleModel", "description_width": "initial", "_view_module": "@jupyter-widgets/base", "_model_module_version": "1.5.0", "_view_count": null, "_view_module_version": "1.2.0", "bar_color": null, "_model_module": "@jupyter-widgets/controls" } }, "e91005c51c12401ca1861403412ffa77": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "state": { "_view_name": "LayoutView", "grid_template_rows": null, "right": null, "justify_content": null, "_view_module": "@jupyter-widgets/base", "overflow": null, "_model_module_version": "1.2.0", "_view_count": null, "flex_flow": null, "width": null, "min_width": null, "border": null, "align_items": null, "bottom": null, "_model_module": "@jupyter-widgets/base", "top": null, "grid_column": null, "overflow_y": null, "overflow_x": null, "grid_auto_flow": null, "grid_area": null, "grid_template_columns": null, "flex": null, "_model_name": "LayoutModel", "justify_items": null, "grid_row": null, "max_height": null, "align_content": null, "visibility": null, "align_self": null, "height": null, "min_height": null, "padding": null, "grid_auto_rows": null, "grid_gap": null, "max_width": null, "order": null, "_view_module_version": "1.2.0", "grid_template_areas": null, "object_position": null, "object_fit": null, "grid_auto_columns": null, "margin": null, "display": null, "left": null } }, "547bb74ff9d4442dbedf16f3d744fc63": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "state": { "_view_name": "StyleView", "_model_name": "DescriptionStyleModel", "description_width": "", "_view_module": "@jupyter-widgets/base", "_model_module_version": "1.5.0", "_view_count": null, "_view_module_version": "1.2.0", "_model_module": "@jupyter-widgets/controls" } }, "f9bd5f9433de4b6fa6ac4f1c4a8ed193": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "state": { "_view_name": "LayoutView", "grid_template_rows": null, "right": null, "justify_content": null, "_view_module": "@jupyter-widgets/base", "overflow": null, "_model_module_version": "1.2.0", "_view_count": null, "flex_flow": null, "width": null, "min_width": null, "border": null, "align_items": null, "bottom": null, "_model_module": "@jupyter-widgets/base", "top": null, "grid_column": null, "overflow_y": null, "overflow_x": null, "grid_auto_flow": null, "grid_area": null, "grid_template_columns": null, "flex": null, "_model_name": "LayoutModel", "justify_items": null, "grid_row": null, "max_height": null, "align_content": null, "visibility": null, "align_self": null, "height": null, "min_height": null, "padding": null, "grid_auto_rows": null, "grid_gap": null, "max_width": null, "order": null, "_view_module_version": "1.2.0", "grid_template_areas": null, "object_position": null, "object_fit": null, "grid_auto_columns": null, "margin": null, "display": null, "left": null } }, "7c8ac18e29d749949352e9f994c2d54c": { "model_module": "@jupyter-widgets/controls", "model_name": "HBoxModel", "state": { "_view_name": "HBoxView", "_dom_classes": [], "_model_name": "HBoxModel", "_view_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_view_count": null, "_view_module_version": "1.5.0", "box_style": "", "layout": "IPY_MODEL_2c103c3e73da47d8b3fbf43f3ac5b05a", "_model_module": "@jupyter-widgets/controls", "children": [ "IPY_MODEL_7eeb1989fab84e1ca2a1c0f169400530", "IPY_MODEL_6db794d4f1ec4ff8992c715ca1776147" ] } }, "2c103c3e73da47d8b3fbf43f3ac5b05a": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "state": { "_view_name": "LayoutView", "grid_template_rows": null, "right": null, "justify_content": null, "_view_module": "@jupyter-widgets/base", "overflow": null, "_model_module_version": "1.2.0", "_view_count": null, "flex_flow": null, "width": null, "min_width": null, "border": null, "align_items": null, "bottom": null, "_model_module": "@jupyter-widgets/base", "top": null, "grid_column": null, "overflow_y": null, "overflow_x": null, "grid_auto_flow": null, "grid_area": null, "grid_template_columns": null, "flex": null, "_model_name": "LayoutModel", "justify_items": null, "grid_row": null, "max_height": null, "align_content": null, "visibility": null, "align_self": null, "height": null, "min_height": null, "padding": null, "grid_auto_rows": null, "grid_gap": null, "max_width": null, "order": null, "_view_module_version": "1.2.0", "grid_template_areas": null, "object_position": null, "object_fit": null, "grid_auto_columns": null, "margin": null, "display": null, "left": null } }, "7eeb1989fab84e1ca2a1c0f169400530": { "model_module": "@jupyter-widgets/controls", "model_name": "FloatProgressModel", "state": { "_view_name": "ProgressView", "style": "IPY_MODEL_85b20f8f885f45eeadcbd02b3748c34c", "_dom_classes": [], "description": "100%", "_model_name": "FloatProgressModel", "bar_style": "success", "max": 1, "_view_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "value": 1, "_view_count": null, "_view_module_version": "1.5.0", "orientation": "horizontal", "min": 0, "description_tooltip": null, "_model_module": "@jupyter-widgets/controls", "layout": "IPY_MODEL_2a050a4e56074143a69af25360caa05c" } }, "6db794d4f1ec4ff8992c715ca1776147": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "state": { "_view_name": "HTMLView", "style": "IPY_MODEL_8b5eb4fb4dc64dc69cdb2b14c4c9b4e2", "_dom_classes": [], "description": "", "_model_name": "HTMLModel", "placeholder": "", "_view_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "value": " 1/1 [00:00<00:00, 12.47ba/s]", "_view_count": null, "_view_module_version": "1.5.0", "description_tooltip": null, "_model_module": "@jupyter-widgets/controls", "layout": "IPY_MODEL_9f80d961ebb04d90916a0c320634fbfb" } }, "85b20f8f885f45eeadcbd02b3748c34c": { "model_module": "@jupyter-widgets/controls", "model_name": "ProgressStyleModel", "state": { "_view_name": "StyleView", "_model_name": "ProgressStyleModel", "description_width": "initial", "_view_module": "@jupyter-widgets/base", "_model_module_version": "1.5.0", "_view_count": null, "_view_module_version": "1.2.0", "bar_color": null, "_model_module": "@jupyter-widgets/controls" } }, "2a050a4e56074143a69af25360caa05c": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "state": { "_view_name": "LayoutView", "grid_template_rows": null, "right": null, "justify_content": null, "_view_module": "@jupyter-widgets/base", "overflow": null, "_model_module_version": "1.2.0", "_view_count": null, "flex_flow": null, "width": null, "min_width": null, "border": null, "align_items": null, "bottom": null, "_model_module": "@jupyter-widgets/base", "top": null, "grid_column": null, "overflow_y": null, "overflow_x": null, "grid_auto_flow": null, "grid_area": null, "grid_template_columns": null, "flex": null, "_model_name": "LayoutModel", "justify_items": null, "grid_row": null, "max_height": null, "align_content": null, "visibility": null, "align_self": null, "height": null, "min_height": null, "padding": null, "grid_auto_rows": null, "grid_gap": null, "max_width": null, "order": null, "_view_module_version": "1.2.0", "grid_template_areas": null, "object_position": null, "object_fit": null, "grid_auto_columns": null, "margin": null, "display": null, "left": null } }, "8b5eb4fb4dc64dc69cdb2b14c4c9b4e2": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "state": { "_view_name": "StyleView", "_model_name": "DescriptionStyleModel", "description_width": "", "_view_module": "@jupyter-widgets/base", "_model_module_version": "1.5.0", "_view_count": null, "_view_module_version": "1.2.0", "_model_module": "@jupyter-widgets/controls" } }, "9f80d961ebb04d90916a0c320634fbfb": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "state": { "_view_name": "LayoutView", "grid_template_rows": null, "right": null, "justify_content": null, "_view_module": "@jupyter-widgets/base", "overflow": null, "_model_module_version": "1.2.0", "_view_count": null, "flex_flow": null, "width": null, "min_width": null, "border": null, "align_items": null, "bottom": null, "_model_module": "@jupyter-widgets/base", "top": null, "grid_column": null, "overflow_y": null, "overflow_x": null, "grid_auto_flow": null, "grid_area": null, "grid_template_columns": null, "flex": null, "_model_name": "LayoutModel", "justify_items": null, "grid_row": null, "max_height": null, "align_content": null, "visibility": null, "align_self": null, "height": null, "min_height": null, "padding": null, "grid_auto_rows": null, "grid_gap": null, "max_width": null, "order": null, "_view_module_version": "1.2.0", "grid_template_areas": null, "object_position": null, "object_fit": null, "grid_auto_columns": null, "margin": null, "display": null, "left": null } } } } }, "cells": [ { "cell_type": "markdown", "metadata": { "id": "l6-IBxzEWJsi" }, "source": [ "# Training BERT for Cyberbullying Detection - HF Trainer Baseline" ] }, { "cell_type": "markdown", "metadata": { "id": "7r9W-oWEWZE9" }, "source": [ "Our goal is to train a binary classification model that should detect cyberbullying in Polish Tweets. The dataset comes from a Polish NLP competition - PolEval 2019 (http://2019.poleval.pl/index.php/tasks/task6). It is also included in Polish NLP Benchmark KLEJ (https://klejbenchmark.com/)." ] }, { "cell_type": "markdown", "metadata": { "id": "E4anfUzlcLdp" }, "source": [ "## Setup" ] }, { "cell_type": "markdown", "metadata": { "id": "yZDsPC-gYU24" }, "source": [ "Let's start by installing two libraries from HuggingFace that will make our job easier: transformers and datasets. We will also import the relevant libraries. " ] }, { "cell_type": "code", "metadata": { "id": "zOP87Wguvu6d", "outputId": "c40b7d62-4269-4970-8164-cda4d6f29202", "colab": { "base_uri": "https://localhost:8080/", "height": 158 } }, "source": [ "!pip install transformers -qq\n", "!pip install datasets -qq" ], "execution_count": null, "outputs": [ { "output_type": "stream", "text": [ "\u001b[K |████████████████████████████████| 1.3MB 3.4MB/s \n", "\u001b[K |████████████████████████████████| 890kB 19.2MB/s \n", "\u001b[K |████████████████████████████████| 1.1MB 29.8MB/s \n", "\u001b[K |████████████████████████████████| 2.9MB 43.7MB/s \n", "\u001b[?25h Building wheel for sacremoses (setup.py) ... \u001b[?25l\u001b[?25hdone\n", "\u001b[K |████████████████████████████████| 153kB 3.3MB/s \n", "\u001b[K |████████████████████████████████| 17.7MB 204kB/s \n", "\u001b[K |████████████████████████████████| 245kB 64.2MB/s \n", "\u001b[?25h" ], "name": "stdout" } ] }, { "cell_type": "code", "metadata": { "id": "__EIOpLVvdCx" }, "source": [ "import numpy as np\n", "import pandas as pd\n", "import torch\n", "from transformers import BertForSequenceClassification, BertTokenizerFast, Trainer, TrainingArguments\n", "from datasets import load_dataset, Dataset\n", "from sklearn.metrics import accuracy_score, precision_recall_fscore_support" ], "execution_count": null, "outputs": [] }, { "cell_type": "markdown", "metadata": { "id": "5jQnJZAyY0xy" }, "source": [ "In this demo, we will use the Polish pretrained BERT model - Polbert (https://github.com/kldarek/polbert). It can be downloaded from the HuggingFace model hub, and we will use BertForSequenceClassification class to load it. We will also need the Polbert tokenizer. " ] }, { "cell_type": "code", "metadata": { "id": "6l01woyxvqbj", "outputId": "8886a145-bc7d-4853-b607-11f7913e3a57", "colab": { "base_uri": "https://localhost:8080/", "height": 374, "referenced_widgets": [ "3b707fa7cdbe427fa7b1b91e04bc4d93", "1d91fd6eb9844f02bf772f0641fd3884", "f1144e00ac7348a28677e50602496d52", "befde3ab120d4aedb7f9ed1f697aae9f", "060a1c45bbfe43aea84162b8d26a420d", "a299219fadb1477ba0f68d22bbdf8386", "41e11aa09f2d4504bdba0ae77ef251cf", "0e25d56f4a3141699171b59ba0e3c47d", "e49a94889ef546d08e552ff54df29588", "9c349123dd3a4d89b58983c0bc7cab2b", "224ad3c6125143959142a35144965fd6", "62d0ad787c724c1d986451291e5c7070", "4729cb6e07404a8692dbe6a1fe9eafe4", "6ff02f76b6904f2395f2a5088e7e4adc", "d0fce87f97b945a4ba7a03e420daffd4", "50a7c5a2d24e47faa9072fe21d0ae396", "a44971002aea4b1d8522d8a8b80fea53", "6f0c82be1c7a4ce4a07728e4dc977b6f", "83e9d626e89441248dae96107c0cd4fd", "78e2abbc309b4e969cc817a31df62816", "b89fea15996a48199a619ad33c59f7eb", "2e5d93f934684f2f80e13e46bb656b5d", "73432aa5e68d494fa462fd7cee2d3e52", "7bc5cf59488e4c378e565f7a9fe39a24", "62f38bd0655c4245b07fe599410eb710", "b11f8c86af064ab892594d8f995ce055", "899847b165d7410eab46fbe2b429b8f9", "0d8f91e9052842939a374af67b833d07", "749936a7630d4e8a95cca5f067dfff7b", "4f5ede7c4ed744239e23ac1a5b4e5640", "b23492319c044c738cdb3ae92d253b37", "5787971cfb424a33a941f8b417c5ad43", "a8557510cebe431a9ba99962d4e513fe", "18efcae012d44802ad3d39cee7a48101", "be90e73566c146d79afb42899bfc5caf", "8da22b40eb2147469604c8c03fc79a29", "0c4e8b6f44934856a716bbc06a8bc628", "7f44a86d5d144f8e8350760b6071e0d4", "e0204efda0cb4af5b788d3f7568be9c2", "55b7d60cf88d47e2bf88d11efd3cacc1" ] } }, "source": [ "model = BertForSequenceClassification.from_pretrained('dkleczek/bert-base-polish-uncased-v1')\n", "tokenizer = BertTokenizerFast.from_pretrained('dkleczek/bert-base-polish-uncased-v1')" ], "execution_count": null, "outputs": [ { "output_type": "display_data", "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "3b707fa7cdbe427fa7b1b91e04bc4d93", "version_minor": 0, "version_major": 2 }, "text/plain": [ "HBox(children=(FloatProgress(value=0.0, description='Downloading', max=459.0, style=ProgressStyle(description_…" ] }, "metadata": { "tags": [] } }, { "output_type": "stream", "text": [ "\n" ], "name": "stdout" }, { "output_type": "display_data", "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "e49a94889ef546d08e552ff54df29588", "version_minor": 0, "version_major": 2 }, "text/plain": [ "HBox(children=(FloatProgress(value=0.0, description='Downloading', max=531146902.0, style=ProgressStyle(descri…" ] }, "metadata": { "tags": [] } }, { "output_type": "stream", "text": [ "\n" ], "name": "stdout" }, { "output_type": "stream", "text": [ "Some weights of the model checkpoint at dkleczek/bert-base-polish-uncased-v1 were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.predictions.decoder.bias', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias']\n", "- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPretraining model).\n", "- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).\n", "Some weights of BertForSequenceClassification were not initialized from the model checkpoint at dkleczek/bert-base-polish-uncased-v1 and are newly initialized: ['classifier.weight', 'classifier.bias']\n", "You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n" ], "name": "stderr" }, { "output_type": "display_data", "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "a44971002aea4b1d8522d8a8b80fea53", "version_minor": 0, "version_major": 2 }, "text/plain": [ "HBox(children=(FloatProgress(value=0.0, description='Downloading', max=494801.0, style=ProgressStyle(descripti…" ] }, "metadata": { "tags": [] } }, { "output_type": "stream", "text": [ "\n" ], "name": "stdout" }, { "output_type": "display_data", "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "62f38bd0655c4245b07fe599410eb710", "version_minor": 0, "version_major": 2 }, "text/plain": [ "HBox(children=(FloatProgress(value=0.0, description='Downloading', max=112.0, style=ProgressStyle(description_…" ] }, "metadata": { "tags": [] } }, { "output_type": "stream", "text": [ "\n" ], "name": "stdout" }, { "output_type": "display_data", "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "a8557510cebe431a9ba99962d4e513fe", "version_minor": 0, "version_major": 2 }, "text/plain": [ "HBox(children=(FloatProgress(value=0.0, description='Downloading', max=2.0, style=ProgressStyle(description_wi…" ] }, "metadata": { "tags": [] } }, { "output_type": "stream", "text": [ "\n" ], "name": "stdout" } ] }, { "cell_type": "markdown", "metadata": { "id": "Jea3L4QEaP6M" }, "source": [ "Now we will load the training data from the KLEJ benchmark website, clean it up, and convert to a csv format that can be used to create a dataset. " ] }, { "cell_type": "markdown", "metadata": { "id": "w26lWw5IcR1e" }, "source": [ "## Data" ] }, { "cell_type": "code", "metadata": { "id": "ePP7gSQty0q1", "outputId": "2c2bac3d-0abb-450a-b28d-7b0ef9aa1ca8", "colab": { "base_uri": "https://localhost:8080/", "height": 265 } }, "source": [ "!wget -q https://klejbenchmark.com/static/data/klej_cbd.zip\n", "!unzip -q klej_cbd.zip" ], "execution_count": null, "outputs": [ { "output_type": "stream", "text": [ "--2020-10-26 08:29:07-- https://klejbenchmark.com/static/data/klej_cbd.zip\n", "Resolving klejbenchmark.com (klejbenchmark.com)... 35.234.99.58\n", "Connecting to klejbenchmark.com (klejbenchmark.com)|35.234.99.58|:443... connected.\n", "HTTP request sent, awaiting response... 200 OK\n", "Length: 375476 (367K) [application/zip]\n", "Saving to: ‘klej_cbd.zip’\n", "\n", "klej_cbd.zip 100%[===================>] 366.68K 866KB/s in 0.4s \n", "\n", "2020-10-26 08:29:08 (866 KB/s) - ‘klej_cbd.zip’ saved [375476/375476]\n", "\n", "Archive: klej_cbd.zip\n", " inflating: test_features.tsv \n", " inflating: train.tsv \n" ], "name": "stdout" } ] }, { "cell_type": "code", "metadata": { "id": "JYWTj6s80BKu", "outputId": "f606bc8c-2372-4fce-d6b6-3ae44cae018b", "colab": { "base_uri": "https://localhost:8080/", "height": 34 } }, "source": [ "df = pd.read_csv('train.tsv', delimiter='\\t')\n", "df = df.dropna().reset_index(drop=True)\n", "df.columns = ['text', 'label']\n", "df.label = df.label.astype(int)\n", "df = df.sample(frac=1, random_state=42)\n", "df.to_csv('train.csv', index=False)\n", "len(df), len(df[df.label == 1])" ], "execution_count": null, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "(10041, 851)" ] }, "metadata": { "tags": [] }, "execution_count": 5 } ] }, { "cell_type": "markdown", "metadata": { "id": "ujOgMyzGbi8T" }, "source": [ "Our training set consists of 10 thousand tweets, but only 851 of those tweets are tagged as cyberbullying. We can take a look at a sample of data in the dataframe. " ] }, { "cell_type": "code", "metadata": { "id": "ur9mfDc11DL_", "outputId": "a59183ba-09c7-4641-9e81-0332b234745f", "colab": { "base_uri": "https://localhost:8080/", "height": 197 } }, "source": [ "df.head()" ], "execution_count": null, "outputs": [ { "output_type": "execute_result", "data": { "text/html": [ "
\n", " | text | \n", "label | \n", "
---|---|---|
5809 | \n", "LUDZIE Z BYDGOSZCZY: NAJLEPSZA RESTAURACJA? Rt... | \n", "0 | \n", "
5938 | \n", "@anonymized_account Stałam na zewnątrz, ale ma... | \n", "0 | \n", "
2260 | \n", "RT @anonymized_account Halicki: proszę nie mów... | \n", "0 | \n", "
8833 | \n", "@anonymized_account @anonymized_account Czyli ... | \n", "1 | \n", "
4513 | \n", "@anonymized_account Już nic nie będzie takie s... | \n", "0 | \n", "
Step | \n", "Training Loss | \n", "Validation Loss | \n", "Accuracy | \n", "F1 | \n", "Precision | \n", "Recall | \n", "
---|---|---|---|---|---|---|
20 | \n", "0.581389 | \n", "0.320318 | \n", "0.907869 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "
40 | \n", "0.268629 | \n", "0.245489 | \n", "0.908367 | \n", "0.010753 | \n", "1.000000 | \n", "0.005405 | \n", "
60 | \n", "0.216368 | \n", "0.229870 | \n", "0.914841 | \n", "0.149254 | \n", "0.937500 | \n", "0.081081 | \n", "
80 | \n", "0.207710 | \n", "0.255416 | \n", "0.913845 | \n", "0.121827 | \n", "1.000000 | \n", "0.064865 | \n", "
100 | \n", "0.187619 | \n", "0.178497 | \n", "0.926793 | \n", "0.374468 | \n", "0.880000 | \n", "0.237838 | \n", "
120 | \n", "0.207315 | \n", "0.169160 | \n", "0.931275 | \n", "0.589286 | \n", "0.655629 | \n", "0.535135 | \n", "
140 | \n", "0.155866 | \n", "0.170657 | \n", "0.929283 | \n", "0.564417 | \n", "0.652482 | \n", "0.497297 | \n", "
160 | \n", "0.173842 | \n", "0.173169 | \n", "0.930279 | \n", "0.554140 | \n", "0.674419 | \n", "0.470270 | \n", "
180 | \n", "0.128828 | \n", "0.174443 | \n", "0.927291 | \n", "0.522876 | \n", "0.661157 | \n", "0.432432 | \n", "
200 | \n", "0.144621 | \n", "0.169735 | \n", "0.930777 | \n", "0.535117 | \n", "0.701754 | \n", "0.432432 | \n", "
220 | \n", "0.122884 | \n", "0.167735 | \n", "0.929781 | \n", "0.568807 | \n", "0.654930 | \n", "0.502703 | \n", "
240 | \n", "0.134178 | \n", "0.168437 | \n", "0.927291 | \n", "0.535032 | \n", "0.651163 | \n", "0.454054 | \n", "
260 | \n", "0.094126 | \n", "0.169739 | \n", "0.932769 | \n", "0.571429 | \n", "0.692308 | \n", "0.486486 | \n", "
280 | \n", "0.079876 | \n", "0.183835 | \n", "0.931275 | \n", "0.574074 | \n", "0.669065 | \n", "0.502703 | \n", "
300 | \n", "0.091462 | \n", "0.203578 | \n", "0.930279 | \n", "0.545455 | \n", "0.682927 | \n", "0.454054 | \n", "
320 | \n", "0.074335 | \n", "0.195306 | \n", "0.930777 | \n", "0.582583 | \n", "0.655405 | \n", "0.524324 | \n", "
340 | \n", "0.079284 | \n", "0.202131 | \n", "0.932769 | \n", "0.563107 | \n", "0.701613 | \n", "0.470270 | \n", "
360 | \n", "0.082395 | \n", "0.193977 | \n", "0.931275 | \n", "0.584337 | \n", "0.659864 | \n", "0.524324 | \n", "
"
],
"text/plain": [
"