{ "nbformat": 4, "nbformat_minor": 0, "metadata": { "colab": { "name": "fast_inference_transformers_on_CPU.ipynb", "provenance": [], "collapsed_sections": [] }, "kernelspec": { "display_name": "Python 3", "name": "python3" }, "language_info": { "name": "python" }, "widgets": { "application/vnd.jupyter.widget-state+json": { "485333784c724dcfb2b5a629d5b5df1d": { "model_module": "@jupyter-widgets/controls", "model_name": "HBoxModel", "model_module_version": "1.5.0", "state": { "_view_name": "HBoxView", "_dom_classes": [], "_model_name": "HBoxModel", "_view_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_view_count": null, "_view_module_version": "1.5.0", "box_style": "", "layout": "IPY_MODEL_daf73dbf3e4d48d98d48395d8fc1282e", "_model_module": "@jupyter-widgets/controls", "children": [ "IPY_MODEL_03109d842c74499382bc8c3c8234cb3d", "IPY_MODEL_4832e8ea4d234bc288fa768a64da7b07", "IPY_MODEL_a82e6a3e16514901a11b2ad390dc8aec" ] } }, "daf73dbf3e4d48d98d48395d8fc1282e": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_view_name": "LayoutView", "grid_template_rows": null, "right": null, "justify_content": null, "_view_module": "@jupyter-widgets/base", "overflow": null, "_model_module_version": "1.2.0", "_view_count": null, "flex_flow": null, "width": null, "min_width": null, "border": null, "align_items": null, "bottom": null, "_model_module": "@jupyter-widgets/base", "top": null, "grid_column": null, "overflow_y": null, "overflow_x": null, "grid_auto_flow": null, "grid_area": null, "grid_template_columns": null, "flex": null, "_model_name": "LayoutModel", "justify_items": null, "grid_row": null, "max_height": null, "align_content": null, "visibility": null, "align_self": null, "height": null, "min_height": null, "padding": null, "grid_auto_rows": null, "grid_gap": null, "max_width": null, "order": null, "_view_module_version": "1.2.0", "grid_template_areas": null, "object_position": null, "object_fit": null, "grid_auto_columns": null, "margin": null, "display": null, "left": null } }, "03109d842c74499382bc8c3c8234cb3d": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "model_module_version": "1.5.0", "state": { "_view_name": "HTMLView", "style": "IPY_MODEL_a9832e3f455446c69d4a5ab5e370de24", "_dom_classes": [], "description": "", "_model_name": "HTMLModel", "placeholder": "​", "_view_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "value": "Downloading: 100%", "_view_count": null, "_view_module_version": "1.5.0", "description_tooltip": null, "_model_module": "@jupyter-widgets/controls", "layout": "IPY_MODEL_3a2fe1d7aec647b7bc59114045435a81" } }, "4832e8ea4d234bc288fa768a64da7b07": { "model_module": "@jupyter-widgets/controls", "model_name": "FloatProgressModel", "model_module_version": "1.5.0", "state": { "_view_name": "ProgressView", "style": "IPY_MODEL_9e9b4b5f047742e6be295e09dd689b22", "_dom_classes": [], "description": "", "_model_name": "FloatProgressModel", "bar_style": "success", "max": 494, "_view_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "value": 494, "_view_count": null, "_view_module_version": "1.5.0", "orientation": "horizontal", "min": 0, "description_tooltip": null, "_model_module": "@jupyter-widgets/controls", "layout": "IPY_MODEL_5b07fa553e0b4f4287e08a777c48b9b8" } }, "a82e6a3e16514901a11b2ad390dc8aec": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "model_module_version": "1.5.0", "state": { "_view_name": "HTMLView", "style": "IPY_MODEL_3f96899ef3cf411ca85c32609f8b62c9", "_dom_classes": [], "description": "", "_model_name": "HTMLModel", "placeholder": "​", "_view_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "value": " 494/494 [00:00<00:00, 8.50kB/s]", "_view_count": null, "_view_module_version": "1.5.0", "description_tooltip": null, "_model_module": "@jupyter-widgets/controls", "layout": "IPY_MODEL_1d3ddc8c772943e08db555d7149bb673" } }, "a9832e3f455446c69d4a5ab5e370de24": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", "state": { "_view_name": "StyleView", "_model_name": "DescriptionStyleModel", "description_width": "", "_view_module": "@jupyter-widgets/base", "_model_module_version": "1.5.0", "_view_count": null, "_view_module_version": "1.2.0", "_model_module": "@jupyter-widgets/controls" } }, "3a2fe1d7aec647b7bc59114045435a81": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_view_name": "LayoutView", "grid_template_rows": null, "right": null, "justify_content": null, "_view_module": "@jupyter-widgets/base", "overflow": null, "_model_module_version": "1.2.0", "_view_count": null, "flex_flow": null, "width": null, "min_width": null, "border": null, "align_items": null, "bottom": null, "_model_module": "@jupyter-widgets/base", "top": null, "grid_column": null, "overflow_y": null, "overflow_x": null, "grid_auto_flow": null, "grid_area": null, "grid_template_columns": null, "flex": null, "_model_name": "LayoutModel", "justify_items": null, "grid_row": null, "max_height": null, "align_content": null, "visibility": null, "align_self": null, "height": null, "min_height": null, "padding": null, "grid_auto_rows": null, "grid_gap": null, "max_width": null, "order": null, "_view_module_version": "1.2.0", "grid_template_areas": null, "object_position": null, "object_fit": null, "grid_auto_columns": null, "margin": null, "display": null, "left": null } }, "9e9b4b5f047742e6be295e09dd689b22": { "model_module": "@jupyter-widgets/controls", "model_name": "ProgressStyleModel", "model_module_version": "1.5.0", "state": { "_view_name": "StyleView", "_model_name": "ProgressStyleModel", "description_width": "", "_view_module": "@jupyter-widgets/base", "_model_module_version": "1.5.0", "_view_count": null, "_view_module_version": "1.2.0", "bar_color": null, "_model_module": "@jupyter-widgets/controls" } }, "5b07fa553e0b4f4287e08a777c48b9b8": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_view_name": "LayoutView", "grid_template_rows": null, "right": null, "justify_content": null, "_view_module": "@jupyter-widgets/base", "overflow": null, "_model_module_version": "1.2.0", "_view_count": null, "flex_flow": null, "width": null, "min_width": null, "border": null, "align_items": null, "bottom": null, "_model_module": "@jupyter-widgets/base", "top": null, "grid_column": null, "overflow_y": null, "overflow_x": null, "grid_auto_flow": null, "grid_area": null, "grid_template_columns": null, "flex": null, "_model_name": "LayoutModel", "justify_items": null, "grid_row": null, "max_height": null, "align_content": null, "visibility": null, "align_self": null, "height": null, "min_height": null, "padding": null, "grid_auto_rows": null, "grid_gap": null, "max_width": null, "order": null, "_view_module_version": "1.2.0", "grid_template_areas": null, "object_position": null, "object_fit": null, "grid_auto_columns": null, "margin": null, "display": null, "left": null } }, "3f96899ef3cf411ca85c32609f8b62c9": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", "state": { "_view_name": "StyleView", "_model_name": "DescriptionStyleModel", "description_width": "", "_view_module": "@jupyter-widgets/base", "_model_module_version": "1.5.0", "_view_count": null, "_view_module_version": "1.2.0", "_model_module": "@jupyter-widgets/controls" } }, "1d3ddc8c772943e08db555d7149bb673": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_view_name": "LayoutView", "grid_template_rows": null, "right": null, "justify_content": null, "_view_module": "@jupyter-widgets/base", "overflow": null, "_model_module_version": "1.2.0", "_view_count": null, "flex_flow": null, "width": null, "min_width": null, "border": null, "align_items": null, "bottom": null, "_model_module": "@jupyter-widgets/base", "top": null, "grid_column": null, "overflow_y": null, "overflow_x": null, "grid_auto_flow": null, "grid_area": null, "grid_template_columns": null, "flex": null, "_model_name": "LayoutModel", "justify_items": null, "grid_row": null, "max_height": null, "align_content": null, "visibility": null, "align_self": null, "height": null, "min_height": null, "padding": null, "grid_auto_rows": null, "grid_gap": null, "max_width": null, "order": null, "_view_module_version": "1.2.0", "grid_template_areas": null, "object_position": null, "object_fit": null, "grid_auto_columns": null, "margin": null, "display": null, "left": null } }, "87c116214fbb4f188e8a8faa4a741207": { "model_module": "@jupyter-widgets/controls", "model_name": "HBoxModel", "model_module_version": "1.5.0", "state": { "_view_name": "HBoxView", "_dom_classes": [], "_model_name": "HBoxModel", "_view_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_view_count": null, "_view_module_version": "1.5.0", "box_style": "", "layout": "IPY_MODEL_f35d13e8523b44889a31ba1e53c68fa8", "_model_module": "@jupyter-widgets/controls", "children": [ "IPY_MODEL_e6ab3d39db2c4b29a672173f05ad6d26", "IPY_MODEL_22ad65c969aa403283388ac972360187", "IPY_MODEL_d8b3c21741cb4bf690f6e718b4cc6f01" ] } }, "f35d13e8523b44889a31ba1e53c68fa8": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_view_name": "LayoutView", "grid_template_rows": null, "right": null, "justify_content": null, "_view_module": "@jupyter-widgets/base", "overflow": null, "_model_module_version": "1.2.0", "_view_count": null, "flex_flow": null, "width": null, "min_width": null, "border": null, "align_items": null, "bottom": null, "_model_module": "@jupyter-widgets/base", "top": null, "grid_column": null, "overflow_y": null, "overflow_x": null, "grid_auto_flow": null, "grid_area": null, "grid_template_columns": null, "flex": null, "_model_name": "LayoutModel", "justify_items": null, "grid_row": null, "max_height": null, "align_content": null, "visibility": null, "align_self": null, "height": null, "min_height": null, "padding": null, "grid_auto_rows": null, "grid_gap": null, "max_width": null, "order": null, "_view_module_version": "1.2.0", "grid_template_areas": null, "object_position": null, "object_fit": null, "grid_auto_columns": null, "margin": null, "display": null, "left": null } }, "e6ab3d39db2c4b29a672173f05ad6d26": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "model_module_version": "1.5.0", "state": { "_view_name": "HTMLView", "style": "IPY_MODEL_4fc621bcb1f64f8e935997af954e7082", "_dom_classes": [], "description": "", "_model_name": "HTMLModel", "placeholder": "​", "_view_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "value": "Downloading: 100%", "_view_count": null, "_view_module_version": "1.5.0", "description_tooltip": null, "_model_module": "@jupyter-widgets/controls", "layout": "IPY_MODEL_5dfe88fdecea45f4847230e69303e0e0" } }, "22ad65c969aa403283388ac972360187": { "model_module": "@jupyter-widgets/controls", "model_name": "FloatProgressModel", "model_module_version": "1.5.0", "state": { "_view_name": "ProgressView", "style": "IPY_MODEL_dcaa5c449c6842c49a56bfb1d73fe8b9", "_dom_classes": [], "description": "", "_model_name": "FloatProgressModel", "bar_style": "success", "max": 862, "_view_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "value": 862, "_view_count": null, "_view_module_version": "1.5.0", "orientation": "horizontal", "min": 0, "description_tooltip": null, "_model_module": "@jupyter-widgets/controls", "layout": "IPY_MODEL_320c7a8b18064524a97b937a1aeb02c3" } }, "d8b3c21741cb4bf690f6e718b4cc6f01": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "model_module_version": "1.5.0", "state": { "_view_name": "HTMLView", "style": "IPY_MODEL_0dd496378d1c4506b779a5a3ea531c43", "_dom_classes": [], "description": "", "_model_name": "HTMLModel", "placeholder": "​", "_view_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "value": " 862/862 [00:00<00:00, 20.0kB/s]", "_view_count": null, "_view_module_version": "1.5.0", "description_tooltip": null, "_model_module": "@jupyter-widgets/controls", "layout": "IPY_MODEL_b9a6ae35a2d8450ead8a3fa2deecfe48" } }, "4fc621bcb1f64f8e935997af954e7082": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", "state": { "_view_name": "StyleView", "_model_name": "DescriptionStyleModel", "description_width": "", "_view_module": "@jupyter-widgets/base", "_model_module_version": "1.5.0", "_view_count": null, "_view_module_version": "1.2.0", "_model_module": "@jupyter-widgets/controls" } }, "5dfe88fdecea45f4847230e69303e0e0": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_view_name": "LayoutView", "grid_template_rows": null, "right": null, "justify_content": null, "_view_module": "@jupyter-widgets/base", "overflow": null, "_model_module_version": "1.2.0", "_view_count": null, "flex_flow": null, "width": null, "min_width": null, "border": null, "align_items": null, "bottom": null, "_model_module": "@jupyter-widgets/base", "top": null, "grid_column": null, "overflow_y": null, "overflow_x": null, "grid_auto_flow": null, "grid_area": null, "grid_template_columns": null, "flex": null, "_model_name": "LayoutModel", "justify_items": null, "grid_row": null, "max_height": null, "align_content": null, "visibility": null, "align_self": null, "height": null, "min_height": null, "padding": null, "grid_auto_rows": null, "grid_gap": null, "max_width": null, "order": null, "_view_module_version": "1.2.0", "grid_template_areas": null, "object_position": null, "object_fit": null, "grid_auto_columns": null, "margin": null, "display": null, "left": null } }, "dcaa5c449c6842c49a56bfb1d73fe8b9": { "model_module": "@jupyter-widgets/controls", "model_name": "ProgressStyleModel", "model_module_version": "1.5.0", "state": { "_view_name": "StyleView", "_model_name": "ProgressStyleModel", "description_width": "", "_view_module": "@jupyter-widgets/base", "_model_module_version": "1.5.0", "_view_count": null, "_view_module_version": "1.2.0", "bar_color": null, "_model_module": "@jupyter-widgets/controls" } }, "320c7a8b18064524a97b937a1aeb02c3": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_view_name": "LayoutView", "grid_template_rows": null, "right": null, "justify_content": null, "_view_module": "@jupyter-widgets/base", "overflow": null, "_model_module_version": "1.2.0", "_view_count": null, "flex_flow": null, "width": null, "min_width": null, "border": null, "align_items": null, "bottom": null, "_model_module": "@jupyter-widgets/base", "top": null, "grid_column": null, "overflow_y": null, "overflow_x": null, "grid_auto_flow": null, "grid_area": null, "grid_template_columns": null, "flex": null, "_model_name": "LayoutModel", "justify_items": null, "grid_row": null, "max_height": null, "align_content": null, "visibility": null, "align_self": null, "height": null, "min_height": null, "padding": null, "grid_auto_rows": null, "grid_gap": null, "max_width": null, "order": null, "_view_module_version": "1.2.0", "grid_template_areas": null, "object_position": null, "object_fit": null, "grid_auto_columns": null, "margin": null, "display": null, "left": null } }, "0dd496378d1c4506b779a5a3ea531c43": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", "state": { "_view_name": "StyleView", "_model_name": "DescriptionStyleModel", "description_width": "", "_view_module": "@jupyter-widgets/base", "_model_module_version": "1.5.0", "_view_count": null, "_view_module_version": "1.2.0", "_model_module": "@jupyter-widgets/controls" } }, "b9a6ae35a2d8450ead8a3fa2deecfe48": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_view_name": "LayoutView", "grid_template_rows": null, "right": null, "justify_content": null, "_view_module": "@jupyter-widgets/base", "overflow": null, "_model_module_version": "1.2.0", "_view_count": null, "flex_flow": null, "width": null, "min_width": null, "border": null, "align_items": null, "bottom": null, "_model_module": "@jupyter-widgets/base", "top": null, "grid_column": null, "overflow_y": null, "overflow_x": null, "grid_auto_flow": null, "grid_area": null, "grid_template_columns": null, "flex": null, "_model_name": "LayoutModel", "justify_items": null, "grid_row": null, "max_height": null, "align_content": null, "visibility": null, "align_self": null, "height": null, "min_height": null, "padding": null, "grid_auto_rows": null, "grid_gap": null, "max_width": null, "order": null, "_view_module_version": "1.2.0", "grid_template_areas": null, "object_position": null, "object_fit": null, "grid_auto_columns": null, "margin": null, "display": null, "left": null } }, "a641e9c5693e46298c3b9d2e6c3f4c38": { "model_module": "@jupyter-widgets/controls", "model_name": "HBoxModel", "model_module_version": "1.5.0", "state": { "_view_name": "HBoxView", "_dom_classes": [], "_model_name": "HBoxModel", "_view_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_view_count": null, "_view_module_version": "1.5.0", "box_style": "", "layout": "IPY_MODEL_6cf56d1eef0945e2a354470bf9153e85", "_model_module": "@jupyter-widgets/controls", "children": [ "IPY_MODEL_a3eade920d7b4a8ba53c03c84093897d", "IPY_MODEL_7d02cfd8c8824f2d92525b5505150f93", "IPY_MODEL_b018c6a0e3ed40ce9210ad80deed1ae9" ] } }, "6cf56d1eef0945e2a354470bf9153e85": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_view_name": "LayoutView", "grid_template_rows": null, "right": null, "justify_content": null, "_view_module": "@jupyter-widgets/base", "overflow": null, "_model_module_version": "1.2.0", "_view_count": null, "flex_flow": null, "width": null, "min_width": null, "border": null, "align_items": null, "bottom": null, "_model_module": "@jupyter-widgets/base", "top": null, "grid_column": null, "overflow_y": null, "overflow_x": null, "grid_auto_flow": null, "grid_area": null, "grid_template_columns": null, "flex": null, "_model_name": "LayoutModel", "justify_items": null, "grid_row": null, "max_height": null, "align_content": null, "visibility": null, "align_self": null, "height": null, "min_height": null, "padding": null, "grid_auto_rows": null, "grid_gap": null, "max_width": null, "order": null, "_view_module_version": "1.2.0", "grid_template_areas": null, "object_position": null, "object_fit": null, "grid_auto_columns": null, "margin": null, "display": null, "left": null } }, "a3eade920d7b4a8ba53c03c84093897d": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "model_module_version": "1.5.0", "state": { "_view_name": "HTMLView", "style": "IPY_MODEL_de1b674460f841468cfe002415927347", "_dom_classes": [], "description": "", "_model_name": "HTMLModel", "placeholder": "​", "_view_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "value": "Downloading: 100%", "_view_count": null, "_view_module_version": "1.5.0", "description_tooltip": null, "_model_module": "@jupyter-widgets/controls", "layout": "IPY_MODEL_41d2acf608a746a386a4afe6478f395e" } }, "7d02cfd8c8824f2d92525b5505150f93": { "model_module": "@jupyter-widgets/controls", "model_name": "FloatProgressModel", "model_module_version": "1.5.0", "state": { "_view_name": "ProgressView", "style": "IPY_MODEL_1838d80c3f3e434291124531d308d200", "_dom_classes": [], "description": "", "_model_name": "FloatProgressModel", "bar_style": "success", "max": 209528, "_view_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "value": 209528, "_view_count": null, "_view_module_version": "1.5.0", "orientation": "horizontal", "min": 0, "description_tooltip": null, "_model_module": "@jupyter-widgets/controls", "layout": "IPY_MODEL_31486fd26e074c7999ae603d5e527a64" } }, "b018c6a0e3ed40ce9210ad80deed1ae9": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "model_module_version": "1.5.0", "state": { "_view_name": "HTMLView", "style": "IPY_MODEL_1c340ae116394401b57da2934f82af10", "_dom_classes": [], "description": "", "_model_name": "HTMLModel", "placeholder": "​", "_view_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "value": " 205k/205k [00:00<00:00, 1.36MB/s]", "_view_count": null, "_view_module_version": "1.5.0", "description_tooltip": null, "_model_module": "@jupyter-widgets/controls", "layout": "IPY_MODEL_2c955331f6c34eb380a2c7a9753b2ff4" } }, "de1b674460f841468cfe002415927347": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", "state": { "_view_name": "StyleView", "_model_name": "DescriptionStyleModel", "description_width": "", "_view_module": "@jupyter-widgets/base", "_model_module_version": "1.5.0", "_view_count": null, "_view_module_version": "1.2.0", "_model_module": "@jupyter-widgets/controls" } }, "41d2acf608a746a386a4afe6478f395e": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_view_name": "LayoutView", "grid_template_rows": null, "right": null, "justify_content": null, "_view_module": "@jupyter-widgets/base", "overflow": null, "_model_module_version": "1.2.0", "_view_count": null, "flex_flow": null, "width": null, "min_width": null, "border": null, "align_items": null, "bottom": null, "_model_module": "@jupyter-widgets/base", "top": null, "grid_column": null, "overflow_y": null, "overflow_x": null, "grid_auto_flow": null, "grid_area": null, "grid_template_columns": null, "flex": null, "_model_name": "LayoutModel", "justify_items": null, "grid_row": null, "max_height": null, "align_content": null, "visibility": null, "align_self": null, "height": null, "min_height": null, "padding": null, "grid_auto_rows": null, "grid_gap": null, "max_width": null, "order": null, "_view_module_version": "1.2.0", "grid_template_areas": null, "object_position": null, "object_fit": null, "grid_auto_columns": null, "margin": null, "display": null, "left": null } }, "1838d80c3f3e434291124531d308d200": { "model_module": "@jupyter-widgets/controls", "model_name": "ProgressStyleModel", "model_module_version": "1.5.0", "state": { "_view_name": "StyleView", "_model_name": "ProgressStyleModel", "description_width": "", "_view_module": "@jupyter-widgets/base", "_model_module_version": "1.5.0", "_view_count": null, "_view_module_version": "1.2.0", "bar_color": null, "_model_module": "@jupyter-widgets/controls" } }, "31486fd26e074c7999ae603d5e527a64": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_view_name": "LayoutView", "grid_template_rows": null, "right": null, "justify_content": null, "_view_module": "@jupyter-widgets/base", "overflow": null, "_model_module_version": "1.2.0", "_view_count": null, "flex_flow": null, "width": null, "min_width": null, "border": null, "align_items": null, "bottom": null, "_model_module": "@jupyter-widgets/base", "top": null, "grid_column": null, "overflow_y": null, "overflow_x": null, "grid_auto_flow": null, "grid_area": null, "grid_template_columns": null, "flex": null, "_model_name": "LayoutModel", "justify_items": null, "grid_row": null, "max_height": null, "align_content": null, "visibility": null, "align_self": null, "height": null, "min_height": null, "padding": null, "grid_auto_rows": null, "grid_gap": null, "max_width": null, "order": null, "_view_module_version": "1.2.0", "grid_template_areas": null, "object_position": null, "object_fit": null, "grid_auto_columns": null, "margin": null, "display": null, "left": null } }, "1c340ae116394401b57da2934f82af10": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", "state": { "_view_name": "StyleView", "_model_name": "DescriptionStyleModel", "description_width": "", "_view_module": "@jupyter-widgets/base", "_model_module_version": "1.5.0", "_view_count": null, "_view_module_version": "1.2.0", "_model_module": "@jupyter-widgets/controls" } }, "2c955331f6c34eb380a2c7a9753b2ff4": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_view_name": "LayoutView", "grid_template_rows": null, "right": null, "justify_content": null, "_view_module": "@jupyter-widgets/base", "overflow": null, "_model_module_version": "1.2.0", "_view_count": null, "flex_flow": null, "width": null, "min_width": null, "border": null, "align_items": null, "bottom": null, "_model_module": "@jupyter-widgets/base", "top": null, "grid_column": null, "overflow_y": null, "overflow_x": null, "grid_auto_flow": null, "grid_area": null, "grid_template_columns": null, "flex": null, "_model_name": "LayoutModel", "justify_items": null, "grid_row": null, "max_height": null, "align_content": null, "visibility": null, "align_self": null, "height": null, "min_height": null, "padding": null, "grid_auto_rows": null, "grid_gap": null, "max_width": null, "order": null, "_view_module_version": "1.2.0", "grid_template_areas": null, "object_position": null, "object_fit": null, "grid_auto_columns": null, "margin": null, "display": null, "left": null } }, "a91211914d054738992f45436891234d": { "model_module": "@jupyter-widgets/controls", "model_name": "HBoxModel", "model_module_version": "1.5.0", "state": { "_view_name": "HBoxView", "_dom_classes": [], "_model_name": "HBoxModel", "_view_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_view_count": null, "_view_module_version": "1.5.0", "box_style": "", "layout": "IPY_MODEL_bc1cd1a174b4420b960c800fd8ef632c", "_model_module": "@jupyter-widgets/controls", "children": [ "IPY_MODEL_6aa4216e3b8f416fad608846bb6c01d1", "IPY_MODEL_b72a598f5e8646708b20be7d461b2d6e", "IPY_MODEL_d58528d6f8434da8a066a0e5ef359090" ] } }, "bc1cd1a174b4420b960c800fd8ef632c": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_view_name": "LayoutView", "grid_template_rows": null, "right": null, "justify_content": null, "_view_module": "@jupyter-widgets/base", "overflow": null, "_model_module_version": "1.2.0", "_view_count": null, "flex_flow": null, "width": null, "min_width": null, "border": null, "align_items": null, "bottom": null, "_model_module": "@jupyter-widgets/base", "top": null, "grid_column": null, "overflow_y": null, "overflow_x": null, "grid_auto_flow": null, "grid_area": null, "grid_template_columns": null, "flex": null, "_model_name": "LayoutModel", "justify_items": null, "grid_row": null, "max_height": null, "align_content": null, "visibility": null, "align_self": null, "height": null, "min_height": null, "padding": null, "grid_auto_rows": null, "grid_gap": null, "max_width": null, "order": null, "_view_module_version": "1.2.0", "grid_template_areas": null, "object_position": null, "object_fit": null, "grid_auto_columns": null, "margin": null, "display": null, "left": null } }, "6aa4216e3b8f416fad608846bb6c01d1": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "model_module_version": "1.5.0", "state": { "_view_name": "HTMLView", "style": "IPY_MODEL_57faa6d833494ea0af844c1303bc02e8", "_dom_classes": [], "description": "", "_model_name": "HTMLModel", "placeholder": "​", "_view_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "value": "Downloading: 100%", "_view_count": null, "_view_module_version": "1.5.0", "description_tooltip": null, "_model_module": "@jupyter-widgets/controls", "layout": "IPY_MODEL_240b580080e14f8fa0d1beb8fb5d5673" } }, "b72a598f5e8646708b20be7d461b2d6e": { "model_module": "@jupyter-widgets/controls", "model_name": "FloatProgressModel", "model_module_version": "1.5.0", "state": { "_view_name": "ProgressView", "style": "IPY_MODEL_159e187fe57d49b7976e0c9b992e330b", "_dom_classes": [], "description": "", "_model_name": "FloatProgressModel", "bar_style": "success", "max": 112, "_view_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "value": 112, "_view_count": null, "_view_module_version": "1.5.0", "orientation": "horizontal", "min": 0, "description_tooltip": null, "_model_module": "@jupyter-widgets/controls", "layout": "IPY_MODEL_885bf670c774498ca66b5d32455daa8f" } }, "d58528d6f8434da8a066a0e5ef359090": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "model_module_version": "1.5.0", "state": { "_view_name": "HTMLView", "style": "IPY_MODEL_cfd84c37baf84863a7e00fdc262ceab1", "_dom_classes": [], "description": "", "_model_name": "HTMLModel", "placeholder": "​", "_view_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "value": " 112/112 [00:00<00:00, 2.75kB/s]", "_view_count": null, "_view_module_version": "1.5.0", "description_tooltip": null, "_model_module": "@jupyter-widgets/controls", "layout": "IPY_MODEL_f6e1393810dd4307b75efb9822d02b1c" } }, "57faa6d833494ea0af844c1303bc02e8": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", "state": { "_view_name": "StyleView", "_model_name": "DescriptionStyleModel", "description_width": "", "_view_module": "@jupyter-widgets/base", "_model_module_version": "1.5.0", "_view_count": null, "_view_module_version": "1.2.0", "_model_module": "@jupyter-widgets/controls" } }, "240b580080e14f8fa0d1beb8fb5d5673": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_view_name": "LayoutView", "grid_template_rows": null, "right": null, "justify_content": null, "_view_module": "@jupyter-widgets/base", "overflow": null, "_model_module_version": "1.2.0", "_view_count": null, "flex_flow": null, "width": null, "min_width": null, "border": null, "align_items": null, "bottom": null, "_model_module": "@jupyter-widgets/base", "top": null, "grid_column": null, "overflow_y": null, "overflow_x": null, "grid_auto_flow": null, "grid_area": null, "grid_template_columns": null, "flex": null, "_model_name": "LayoutModel", "justify_items": null, "grid_row": null, "max_height": null, "align_content": null, "visibility": null, "align_self": null, "height": null, "min_height": null, "padding": null, "grid_auto_rows": null, "grid_gap": null, "max_width": null, "order": null, "_view_module_version": "1.2.0", "grid_template_areas": null, "object_position": null, "object_fit": null, "grid_auto_columns": null, "margin": null, "display": null, "left": null } }, "159e187fe57d49b7976e0c9b992e330b": { "model_module": "@jupyter-widgets/controls", "model_name": "ProgressStyleModel", "model_module_version": "1.5.0", "state": { "_view_name": "StyleView", "_model_name": "ProgressStyleModel", "description_width": "", "_view_module": "@jupyter-widgets/base", "_model_module_version": "1.5.0", "_view_count": null, "_view_module_version": "1.2.0", "bar_color": null, "_model_module": "@jupyter-widgets/controls" } }, "885bf670c774498ca66b5d32455daa8f": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_view_name": "LayoutView", "grid_template_rows": null, "right": null, "justify_content": null, "_view_module": "@jupyter-widgets/base", "overflow": null, "_model_module_version": "1.2.0", "_view_count": null, "flex_flow": null, "width": null, "min_width": null, "border": null, "align_items": null, "bottom": null, "_model_module": "@jupyter-widgets/base", "top": null, "grid_column": null, "overflow_y": null, "overflow_x": null, "grid_auto_flow": null, "grid_area": null, "grid_template_columns": null, "flex": null, "_model_name": "LayoutModel", "justify_items": null, "grid_row": null, "max_height": null, "align_content": null, "visibility": null, "align_self": null, "height": null, "min_height": null, "padding": null, "grid_auto_rows": null, "grid_gap": null, "max_width": null, "order": null, "_view_module_version": "1.2.0", "grid_template_areas": null, "object_position": null, "object_fit": null, "grid_auto_columns": null, "margin": null, "display": null, "left": null } }, "cfd84c37baf84863a7e00fdc262ceab1": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", "state": { "_view_name": "StyleView", "_model_name": "DescriptionStyleModel", "description_width": "", "_view_module": "@jupyter-widgets/base", "_model_module_version": "1.5.0", "_view_count": null, "_view_module_version": "1.2.0", "_model_module": "@jupyter-widgets/controls" } }, "f6e1393810dd4307b75efb9822d02b1c": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_view_name": "LayoutView", "grid_template_rows": null, "right": null, "justify_content": null, "_view_module": "@jupyter-widgets/base", "overflow": null, "_model_module_version": "1.2.0", "_view_count": null, "flex_flow": null, "width": null, "min_width": null, "border": null, "align_items": null, "bottom": null, "_model_module": "@jupyter-widgets/base", "top": null, "grid_column": null, "overflow_y": null, "overflow_x": null, "grid_auto_flow": null, "grid_area": null, "grid_template_columns": null, "flex": null, "_model_name": "LayoutModel", "justify_items": null, "grid_row": null, "max_height": null, "align_content": null, "visibility": null, "align_self": null, "height": null, "min_height": null, "padding": null, "grid_auto_rows": null, "grid_gap": null, "max_width": null, "order": null, "_view_module_version": "1.2.0", "grid_template_areas": null, "object_position": null, "object_fit": null, "grid_auto_columns": null, "margin": null, "display": null, "left": null } }, "6902349aa5f7486a8d37d25d373569a1": { "model_module": "@jupyter-widgets/controls", "model_name": "HBoxModel", "model_module_version": "1.5.0", "state": { "_view_name": "HBoxView", "_dom_classes": [], "_model_name": "HBoxModel", "_view_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_view_count": null, "_view_module_version": "1.5.0", "box_style": "", "layout": "IPY_MODEL_7676d49f0e124f469122ea707c0c4086", "_model_module": "@jupyter-widgets/controls", "children": [ "IPY_MODEL_202a1381fdb744b29a0cf9a7a3609060", "IPY_MODEL_02ce428f285c4eb39769102afc29eea9", "IPY_MODEL_c3eb72fbdc4e4d8980d094b15c0ddbb0" ] } }, "7676d49f0e124f469122ea707c0c4086": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_view_name": "LayoutView", "grid_template_rows": null, "right": null, "justify_content": null, "_view_module": "@jupyter-widgets/base", "overflow": null, "_model_module_version": "1.2.0", "_view_count": null, "flex_flow": null, "width": null, "min_width": null, "border": null, "align_items": null, "bottom": null, "_model_module": "@jupyter-widgets/base", "top": null, "grid_column": null, "overflow_y": null, "overflow_x": null, "grid_auto_flow": null, "grid_area": null, "grid_template_columns": null, "flex": null, "_model_name": "LayoutModel", "justify_items": null, "grid_row": null, "max_height": null, "align_content": null, "visibility": null, "align_self": null, "height": null, "min_height": null, "padding": null, "grid_auto_rows": null, "grid_gap": null, "max_width": null, "order": null, "_view_module_version": "1.2.0", "grid_template_areas": null, "object_position": null, "object_fit": null, "grid_auto_columns": null, "margin": null, "display": null, "left": null } }, "202a1381fdb744b29a0cf9a7a3609060": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "model_module_version": "1.5.0", "state": { "_view_name": "HTMLView", "style": "IPY_MODEL_b7adf3af71c44312917c57b6bbd382eb", "_dom_classes": [], "description": "", "_model_name": "HTMLModel", "placeholder": "​", "_view_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "value": "Downloading: 100%", "_view_count": null, "_view_module_version": "1.5.0", "description_tooltip": null, "_model_module": "@jupyter-widgets/controls", "layout": "IPY_MODEL_18928fee4c724a2c840b5cd77016302e" } }, "02ce428f285c4eb39769102afc29eea9": { "model_module": "@jupyter-widgets/controls", "model_name": "FloatProgressModel", "model_module_version": "1.5.0", "state": { "_view_name": "ProgressView", "style": "IPY_MODEL_d0b7be5bfa4444b5920c0ca4b031a579", "_dom_classes": [], "description": "", "_model_name": "FloatProgressModel", "bar_style": "success", "max": 433422856, "_view_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "value": 433422856, "_view_count": null, "_view_module_version": "1.5.0", "orientation": "horizontal", "min": 0, "description_tooltip": null, "_model_module": "@jupyter-widgets/controls", "layout": "IPY_MODEL_2caa09f106224aeebd611862d7a91b91" } }, "c3eb72fbdc4e4d8980d094b15c0ddbb0": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "model_module_version": "1.5.0", "state": { "_view_name": "HTMLView", "style": "IPY_MODEL_64e4d22c5ed14556a38a15bbded24f8f", "_dom_classes": [], "description": "", "_model_name": "HTMLModel", "placeholder": "​", "_view_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "value": " 413M/413M [00:10<00:00, 40.5MB/s]", "_view_count": null, "_view_module_version": "1.5.0", "description_tooltip": null, "_model_module": "@jupyter-widgets/controls", "layout": "IPY_MODEL_945bc80296854fccb60294358c231261" } }, "b7adf3af71c44312917c57b6bbd382eb": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", "state": { "_view_name": "StyleView", "_model_name": "DescriptionStyleModel", "description_width": "", "_view_module": "@jupyter-widgets/base", "_model_module_version": "1.5.0", "_view_count": null, "_view_module_version": "1.2.0", "_model_module": "@jupyter-widgets/controls" } }, "18928fee4c724a2c840b5cd77016302e": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_view_name": "LayoutView", "grid_template_rows": null, "right": null, "justify_content": null, "_view_module": "@jupyter-widgets/base", "overflow": null, "_model_module_version": "1.2.0", "_view_count": null, "flex_flow": null, "width": null, "min_width": null, "border": null, "align_items": null, "bottom": null, "_model_module": "@jupyter-widgets/base", "top": null, "grid_column": null, "overflow_y": null, "overflow_x": null, "grid_auto_flow": null, "grid_area": null, "grid_template_columns": null, "flex": null, "_model_name": "LayoutModel", "justify_items": null, "grid_row": null, "max_height": null, "align_content": null, "visibility": null, "align_self": null, "height": null, "min_height": null, "padding": null, "grid_auto_rows": null, "grid_gap": null, "max_width": null, "order": null, "_view_module_version": "1.2.0", "grid_template_areas": null, "object_position": null, "object_fit": null, "grid_auto_columns": null, "margin": null, "display": null, "left": null } }, "d0b7be5bfa4444b5920c0ca4b031a579": { "model_module": "@jupyter-widgets/controls", "model_name": "ProgressStyleModel", "model_module_version": "1.5.0", "state": { "_view_name": "StyleView", "_model_name": "ProgressStyleModel", "description_width": "", "_view_module": "@jupyter-widgets/base", "_model_module_version": "1.5.0", "_view_count": null, "_view_module_version": "1.2.0", "bar_color": null, "_model_module": "@jupyter-widgets/controls" } }, "2caa09f106224aeebd611862d7a91b91": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_view_name": "LayoutView", "grid_template_rows": null, "right": null, "justify_content": null, "_view_module": "@jupyter-widgets/base", "overflow": null, "_model_module_version": "1.2.0", "_view_count": null, "flex_flow": null, "width": null, "min_width": null, "border": null, "align_items": null, "bottom": null, "_model_module": "@jupyter-widgets/base", "top": null, "grid_column": null, "overflow_y": null, "overflow_x": null, "grid_auto_flow": null, "grid_area": null, "grid_template_columns": null, "flex": null, "_model_name": "LayoutModel", "justify_items": null, "grid_row": null, "max_height": null, "align_content": null, "visibility": null, "align_self": null, "height": null, "min_height": null, "padding": null, "grid_auto_rows": null, "grid_gap": null, "max_width": null, "order": null, "_view_module_version": "1.2.0", "grid_template_areas": null, "object_position": null, "object_fit": null, "grid_auto_columns": null, "margin": null, "display": null, "left": null } }, "64e4d22c5ed14556a38a15bbded24f8f": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", "state": { "_view_name": "StyleView", "_model_name": "DescriptionStyleModel", "description_width": "", "_view_module": "@jupyter-widgets/base", "_model_module_version": "1.5.0", "_view_count": null, "_view_module_version": "1.2.0", "_model_module": "@jupyter-widgets/controls" } }, "945bc80296854fccb60294358c231261": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_view_name": "LayoutView", "grid_template_rows": null, "right": null, "justify_content": null, "_view_module": "@jupyter-widgets/base", "overflow": null, "_model_module_version": "1.2.0", "_view_count": null, "flex_flow": null, "width": null, "min_width": null, "border": null, "align_items": null, "bottom": null, "_model_module": "@jupyter-widgets/base", "top": null, "grid_column": null, "overflow_y": null, "overflow_x": null, "grid_auto_flow": null, "grid_area": null, "grid_template_columns": null, "flex": null, "_model_name": "LayoutModel", "justify_items": null, "grid_row": null, "max_height": null, "align_content": null, "visibility": null, "align_self": null, "height": null, "min_height": null, "padding": null, "grid_auto_rows": null, "grid_gap": null, "max_width": null, "order": null, "_view_module_version": "1.2.0", "grid_template_areas": null, "object_position": null, "object_fit": null, "grid_auto_columns": null, "margin": null, "display": null, "left": null } } } } }, "cells": [ { "cell_type": "markdown", "metadata": { "id": "_hkUF5jx2Xxs" }, "source": [ "# Fast inference for Hugging Face tasks models on CPU (for example: QA model)" ] }, { "cell_type": "markdown", "metadata": { "id": "mDmIlOqh3Cyh" }, "source": [ "- **Author**: [Pierre Guillou](https://www.linkedin.com/in/pierreguillou/)\n", "- **Version & Date** : v1 (10/22/2021)\n", "- **Blog post**: [NLP nas empresas | Técnicas para acelerar modelos de Deep Learning para inferência em produção](https://medium.com/@pierre_guillou/nlp-nas-empresas-t%C3%A9cnicas-para-acelerar-modelos-de-deep-learning-para-infer%C3%AAncia-em-produ%C3%A7%C3%A3o-884acbf49f20)\n", "- **Other notebook** : [fast_inference_transformers_on_GPU.ipynb](https://github.com/piegu/language-models/blob/master/fast_inference_transformers_on_GPU.ipynb)\n" ] }, { "cell_type": "markdown", "metadata": { "id": "FREHU6cIboN8" }, "source": [ "## Notebook overview" ] }, { "cell_type": "markdown", "metadata": { "id": "vre_6Zjfbq_s" }, "source": [ "### Objective\n", "The objective of this notebook is to help those who want to **accelerate inference time on CPU for tasks models of Hugging Face** (NER, QA, Classification...).\n", "\n", "### Method for inference\n", "\n", "source: https://discuss.pytorch.org/t/model-eval-vs-with-torch-no-grad/19615/2\n", "\n", "- `model.eval()` will notify all your layers that you are in eval mode, that way, batchnorm or dropout layers will work in eval mode instead of training mode.\n", "- `torch.no_grad()` impacts the autograd engine and deactivate it. It will reduce memory usage and speed up computations but you won’t be able to backprop (which you don’t want in an eval script).\n", "\n", "### References\n", "- post blog from HF and Microsoft: [Accelerate your NLP pipelines using Hugging Face Transformers and ONNX Runtime](https://medium.com/microsoftazure/accelerate-your-nlp-pipelines-using-hugging-face-transformers-and-onnx-runtime-2443578f4333) (19/05/2020)" ] }, { "cell_type": "markdown", "metadata": { "id": "DNmi0bXikgM3" }, "source": [ "## System overview" ] }, { "cell_type": "code", "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 35 }, "id": "ov7T5Sk_pL_B", "outputId": "c985ae05-2def-4871-a5dd-08b93686d938" }, "source": [ "import platform\n", "platform.platform()" ], "execution_count": 1, "outputs": [ { "output_type": "execute_result", "data": { "application/vnd.google.colaboratory.intrinsic+json": { "type": "string" }, "text/plain": [ "'Linux-5.4.104+-x86_64-with-Ubuntu-18.04-bionic'" ] }, "metadata": {}, "execution_count": 1 } ] }, { "cell_type": "markdown", "metadata": { "id": "VvdQxXRHiXHL" }, "source": [ "### CPU" ] }, { "cell_type": "code", "metadata": { "id": "3ViMgMkxklj7" }, "source": [ "from psutil import *" ], "execution_count": 2, "outputs": [] }, { "cell_type": "code", "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "dBjiK-_tkntn", "outputId": "5a406a1e-80d4-4298-f2a6-f6ec15cc54b1" }, "source": [ "cpu_count(),cpu_stats()" ], "execution_count": 3, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "(2,\n", " scpustats(ctx_switches=695139, interrupts=346474, soft_interrupts=359163, syscalls=0))" ] }, "metadata": {}, "execution_count": 3 } ] }, { "cell_type": "code", "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "SnzYmVM4kuwl", "outputId": "f70a9c34-1d1c-4476-c1ea-8e51bcbb8e46" }, "source": [ "!cat /proc/cpuinfo" ], "execution_count": 4, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "processor\t: 0\n", "vendor_id\t: GenuineIntel\n", "cpu family\t: 6\n", "model\t\t: 79\n", "model name\t: Intel(R) Xeon(R) CPU @ 2.20GHz\n", "stepping\t: 0\n", "microcode\t: 0x1\n", "cpu MHz\t\t: 2199.998\n", "cache size\t: 56320 KB\n", "physical id\t: 0\n", "siblings\t: 2\n", "core id\t\t: 0\n", "cpu cores\t: 1\n", "apicid\t\t: 0\n", "initial apicid\t: 0\n", "fpu\t\t: yes\n", "fpu_exception\t: yes\n", "cpuid level\t: 13\n", "wp\t\t: yes\n", "flags\t\t: fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush mmx fxsr sse sse2 ss ht syscall nx pdpe1gb rdtscp lm constant_tsc rep_good nopl xtopology nonstop_tsc cpuid tsc_known_freq pni pclmulqdq ssse3 fma cx16 pcid sse4_1 sse4_2 x2apic movbe popcnt aes xsave avx f16c rdrand hypervisor lahf_lm abm 3dnowprefetch invpcid_single ssbd ibrs ibpb stibp fsgsbase tsc_adjust bmi1 hle avx2 smep bmi2 erms invpcid rtm rdseed adx smap xsaveopt arat md_clear arch_capabilities\n", "bugs\t\t: cpu_meltdown spectre_v1 spectre_v2 spec_store_bypass l1tf mds swapgs taa\n", "bogomips\t: 4399.99\n", "clflush size\t: 64\n", "cache_alignment\t: 64\n", "address sizes\t: 46 bits physical, 48 bits virtual\n", "power management:\n", "\n", "processor\t: 1\n", "vendor_id\t: GenuineIntel\n", "cpu family\t: 6\n", "model\t\t: 79\n", "model name\t: Intel(R) Xeon(R) CPU @ 2.20GHz\n", "stepping\t: 0\n", "microcode\t: 0x1\n", "cpu MHz\t\t: 2199.998\n", "cache size\t: 56320 KB\n", "physical id\t: 0\n", "siblings\t: 2\n", "core id\t\t: 0\n", "cpu cores\t: 1\n", "apicid\t\t: 1\n", "initial apicid\t: 1\n", "fpu\t\t: yes\n", "fpu_exception\t: yes\n", "cpuid level\t: 13\n", "wp\t\t: yes\n", "flags\t\t: fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush mmx fxsr sse sse2 ss ht syscall nx pdpe1gb rdtscp lm constant_tsc rep_good nopl xtopology nonstop_tsc cpuid tsc_known_freq pni pclmulqdq ssse3 fma cx16 pcid sse4_1 sse4_2 x2apic movbe popcnt aes xsave avx f16c rdrand hypervisor lahf_lm abm 3dnowprefetch invpcid_single ssbd ibrs ibpb stibp fsgsbase tsc_adjust bmi1 hle avx2 smep bmi2 erms invpcid rtm rdseed adx smap xsaveopt arat md_clear arch_capabilities\n", "bugs\t\t: cpu_meltdown spectre_v1 spectre_v2 spec_store_bypass l1tf mds swapgs taa\n", "bogomips\t: 4399.99\n", "clflush size\t: 64\n", "cache_alignment\t: 64\n", "address sizes\t: 46 bits physical, 48 bits virtual\n", "power management:\n", "\n" ] } ] }, { "cell_type": "code", "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "iU5N7WYs2Y9b", "outputId": "e272c4b5-4352-4990-a86d-9e4cce9c29a5" }, "source": [ "!df -h" ], "execution_count": 5, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "Filesystem Size Used Avail Use% Mounted on\n", "overlay 108G 47G 62G 43% /\n", "tmpfs 64M 0 64M 0% /dev\n", "tmpfs 6.4G 0 6.4G 0% /sys/fs/cgroup\n", "shm 5.9G 0 5.9G 0% /dev/shm\n", "/dev/root 2.0G 1.2G 821M 59% /sbin/docker-init\n", "tmpfs 6.4G 28K 6.4G 1% /var/colab\n", "/dev/sda1 81G 51G 31G 63% /etc/hosts\n", "tmpfs 6.4G 0 6.4G 0% /proc/acpi\n", "tmpfs 6.4G 0 6.4G 0% /proc/scsi\n", "tmpfs 6.4G 0 6.4G 0% /sys/firmware\n" ] } ] }, { "cell_type": "code", "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "r7DSRg45kzqp", "outputId": "28c21be1-716f-49f3-e93b-bf0e5e604b41" }, "source": [ "virtual_memory()" ], "execution_count": 6, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "svmem(total=13622198272, available=12802056192, percent=6.0, used=543846400, free=10809655296, active=1039499264, inactive=1529974784, buffers=125227008, cached=2143469568, shared=1171456, slab=192012288)" ] }, "metadata": {}, "execution_count": 6 } ] }, { "cell_type": "markdown", "metadata": { "id": "xOuBqEynERFG" }, "source": [ "## Installation" ] }, { "cell_type": "code", "metadata": { "id": "P_sWC1bkETZw" }, "source": [ "%%capture\n", "!pip install transformers" ], "execution_count": 7, "outputs": [] }, { "cell_type": "code", "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "OQxJyew3GOEg", "outputId": "aa15c843-b5a4-45dd-f883-14d329f3e170" }, "source": [ "import transformers, torch, numpy as np\n", "\n", "print(\"transformers:\",transformers.__version__)\n", "print(\"torch:\",torch.__version__)\n", "print(\"numpy:\",np.__version__)" ], "execution_count": 8, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "transformers: 4.11.3\n", "torch: 1.9.0+cu111\n", "numpy: 1.19.5\n" ] } ] }, { "cell_type": "code", "metadata": { "id": "OFZpEH_scifg" }, "source": [ "from time import perf_counter\n", "def timer(f,*args): \n", " start = perf_counter()\n", " f(*args)\n", " return (1000 * (perf_counter() - start))" ], "execution_count": 9, "outputs": [] }, { "cell_type": "markdown", "metadata": { "id": "U2VWWKpbR5mL" }, "source": [ "## QA model" ] }, { "cell_type": "markdown", "metadata": { "id": "vDfR4S0DcNoi" }, "source": [ "Model at https://huggingface.co/pierreguillou/bert-base-cased-squad-v1.1-portuguese" ] }, { "cell_type": "code", "metadata": { "id": "cBlOI89tR8Oq" }, "source": [ "model_checkpoint = \"pierreguillou/bert-base-cased-squad-v1.1-portuguese\"\n", "# model_checkpoint = \"pierreguillou/bert-large-cased-squad-v1.1-portuguese\"" ], "execution_count": 10, "outputs": [] }, { "cell_type": "code", "metadata": { "id": "bclIDEVOSm-M" }, "source": [ "context = r\"\"\"\n", "A pandemia de COVID-19, também conhecida como pandemia de coronavírus, é uma pandemia em curso de COVID-19, \n", "uma doença respiratória aguda causada pelo coronavírus da síndrome respiratória aguda grave 2 (SARS-CoV-2). \n", "A doença foi identificada pela primeira vez em Wuhan, na província de Hubei, República Popular da China, \n", "em 1 de dezembro de 2019, mas o primeiro caso foi reportado em 31 de dezembro do mesmo ano. \n", "Acredita-se que o vírus tenha uma origem zoonótica, porque os primeiros casos confirmados \n", "tinham principalmente ligações ao Mercado Atacadista de Frutos do Mar de Huanan, que também vendia animais vivos. \n", "Em 11 de março de 2020, a Organização Mundial da Saúde declarou o surto uma pandemia. Até 8 de fevereiro de 2021, \n", "pelo menos 105 743 102 casos da doença foram confirmados em pelo menos 191 países e territórios, \n", "com cerca de 2 308 943 mortes e 58 851 440 pessoas curadas.\n", "\"\"\"\n", "\n", "question = \"Quando começou a pandemia de Covid-19 no mundo?\"" ], "execution_count": 11, "outputs": [] }, { "cell_type": "markdown", "metadata": { "id": "QeiLPvAteK0q" }, "source": [ "## 1. Check the model" ] }, { "cell_type": "markdown", "metadata": { "id": "itx8-vxHgrgI" }, "source": [ "Before evaluating its inference time, let's check that our QA model is working well." ] }, { "cell_type": "markdown", "metadata": { "id": "dUixO7_ueq38" }, "source": [ "#### 1.1 Without pipeline" ] }, { "cell_type": "code", "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 177, "referenced_widgets": [ "485333784c724dcfb2b5a629d5b5df1d", "daf73dbf3e4d48d98d48395d8fc1282e", "03109d842c74499382bc8c3c8234cb3d", "4832e8ea4d234bc288fa768a64da7b07", "a82e6a3e16514901a11b2ad390dc8aec", "a9832e3f455446c69d4a5ab5e370de24", "3a2fe1d7aec647b7bc59114045435a81", "9e9b4b5f047742e6be295e09dd689b22", "5b07fa553e0b4f4287e08a777c48b9b8", "3f96899ef3cf411ca85c32609f8b62c9", "1d3ddc8c772943e08db555d7149bb673", "87c116214fbb4f188e8a8faa4a741207", "f35d13e8523b44889a31ba1e53c68fa8", "e6ab3d39db2c4b29a672173f05ad6d26", "22ad65c969aa403283388ac972360187", "d8b3c21741cb4bf690f6e718b4cc6f01", "4fc621bcb1f64f8e935997af954e7082", "5dfe88fdecea45f4847230e69303e0e0", "dcaa5c449c6842c49a56bfb1d73fe8b9", "320c7a8b18064524a97b937a1aeb02c3", "0dd496378d1c4506b779a5a3ea531c43", "b9a6ae35a2d8450ead8a3fa2deecfe48", "a641e9c5693e46298c3b9d2e6c3f4c38", "6cf56d1eef0945e2a354470bf9153e85", "a3eade920d7b4a8ba53c03c84093897d", "7d02cfd8c8824f2d92525b5505150f93", "b018c6a0e3ed40ce9210ad80deed1ae9", "de1b674460f841468cfe002415927347", "41d2acf608a746a386a4afe6478f395e", "1838d80c3f3e434291124531d308d200", "31486fd26e074c7999ae603d5e527a64", "1c340ae116394401b57da2934f82af10", "2c955331f6c34eb380a2c7a9753b2ff4", "a91211914d054738992f45436891234d", "bc1cd1a174b4420b960c800fd8ef632c", "6aa4216e3b8f416fad608846bb6c01d1", "b72a598f5e8646708b20be7d461b2d6e", "d58528d6f8434da8a066a0e5ef359090", "57faa6d833494ea0af844c1303bc02e8", "240b580080e14f8fa0d1beb8fb5d5673", "159e187fe57d49b7976e0c9b992e330b", "885bf670c774498ca66b5d32455daa8f", "cfd84c37baf84863a7e00fdc262ceab1", "f6e1393810dd4307b75efb9822d02b1c", "6902349aa5f7486a8d37d25d373569a1", "7676d49f0e124f469122ea707c0c4086", "202a1381fdb744b29a0cf9a7a3609060", "02ce428f285c4eb39769102afc29eea9", "c3eb72fbdc4e4d8980d094b15c0ddbb0", "b7adf3af71c44312917c57b6bbd382eb", "18928fee4c724a2c840b5cd77016302e", "d0b7be5bfa4444b5920c0ca4b031a579", "2caa09f106224aeebd611862d7a91b91", "64e4d22c5ed14556a38a15bbded24f8f", "945bc80296854fccb60294358c231261" ] }, "id": "v0Ewv1TleWqi", "outputId": "03642c39-f8c6-412a-b3fe-bbd01b0cb4cd" }, "source": [ "from transformers import AutoTokenizer, AutoModelForQuestionAnswering\n", "\n", "tokenizer = AutoTokenizer.from_pretrained(model_checkpoint, use_fast=True)\n", "model = AutoModelForQuestionAnswering.from_pretrained(model_checkpoint)\n", "model.eval();" ], "execution_count": 12, "outputs": [ { "output_type": "display_data", "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "485333784c724dcfb2b5a629d5b5df1d", "version_minor": 0, "version_major": 2 }, "text/plain": [ "Downloading: 0%| | 0.00/494 [00:00\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Latency on CPU (ms)
Without pipeline889.07
With pipeline872.47
\n", "" ], "text/plain": [ " Latency on CPU (ms)\n", "Without pipeline 889.07\n", "With pipeline 872.47" ] }, "metadata": {}, "execution_count": 24 } ] }, { "cell_type": "code", "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 352 }, "id": "QfTf24EPMLAR", "outputId": "4df21df0-39aa-4fa5-e10c-60b812755540" }, "source": [ "import numpy as np\n", "import matplotlib.pyplot as plt\n", "\n", "labels = ['CPU']\n", "data = [mean_time_cpu, pipeline_mean_time_cpu]\n", "\n", "fig = plt.figure()\n", "ax = fig.add_axes([0,0,1,1])\n", "\n", "X = np.arange(1)\n", "ax.bar(X - 0.1, data[0], color = 'r', width = 0.2, label='Without pipeline')\n", "ax.bar(X + 0.1, data[1], color = 'g', width = 0.2, label='With pipeline')\n", "\n", "# axes and title\n", "x = np.arange(len(labels)) # the label locations\n", "ax.set_xticks(x)\n", "ax.set_xticklabels(labels)\n", "ax.set_ylabel('Latency (ms)')\n", "ax.set_title('Inference latency of PyTorch model (CPU) in Google Colab')\n", "\n", "leg = ax.legend();" ], "execution_count": 25, "outputs": [ { "output_type": "display_data", "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAeYAAAFPCAYAAACYgG3pAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjIsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+WH4yJAAAgAElEQVR4nO3dedxUdd3/8ddHVhUEUbIQBMUVWRTBBTNJSdJMu1NTMTVvFa3c0Fy6b3+GZaWZe+VeYGpulZFpueJtobIkQkAqbmxqgIC4i3x/f5xzXQ4X1zJcMFwHr9fz8ZjHNed8v+ecz8yca95zlpkTKSUkSVIxrNfUBUiSpE8YzJIkFYjBLElSgRjMkiQViMEsSVKBGMySJBWIwVxQEbFZRPxfRCyNiMuaup7GiIhREXFRU9dRKRHx7Yh4IyLejohNmrqeckVEioitC1DH4IiYU2bfkRFxaz3tbSJiekR8bs1VWG89p0bEJQ30eTsitlob9axJETE2Ik5oguXW+xrX6Pupfm8xmNeiiHglIoaU2X04sADYKKV0VgXLKoSmejNorIhoBVwO7JdSapdSWlijvUcegG/nt1ci4rwy5vt2yW15RLxXMnxUpR7Pp8Bw4P9SSq9VjYiIXSPi/ohYHBFvRsT4iDgubxucP79v5x9+n6vRttIHhhrr6I3AURHxmboKyteLlxrzYCKidURckNf1TkTMjYgHImK/xsxvbcnrHhkRL+R1vxIRv46IHk1d27rEYC6u7sD01IhfgImIlhWoRyvaDGgLTGugX8eUUjvgSOCCiPhyfZ3zN/N2+TSzgK+WjLutnMKa6et/MvDbqoGI2AN4FHgc2BrYBPg2sH/JNPPy53kj4FzgxojoVc7CUkrvAw8Ax6yR6ld2D3BwPv+NgS2Bq4CvVGh5a8o9wEHAMKAD0A+YBOzblEWtawzmJhIR34qIv0fEzyNiUUS8HBH7522jgGOBc/JP9EMiYr2IOC8iXoyIhRFxV0R0yvtXbZ0dHxGzyN6QiIj/jogZ+fz/FhHdS5afIuLk/JPt4oj4ZURESfuJ+bRL812E/fPxXSLi9xExP6/5tDIf78YRcV8+3aL8fte87cfAXsAv8sf7i3z89hHxUL6181xEfKNkfqPymv+S1/h0RPQsad+xZNo3IuJ/IuKzEfFulOx2joj+eU2taqm5TURcGRHz8tuV+bhtgefybosj4tGGHn9K6UmyEO+d173C4YmIGBMRI+p5/mqtJW8bHBFzIuLciHgd+E1EtMgf84v58zMpIrqVzHJIXa99jeWOjIi7I+LWfD5TI2LbiPh+RPwnImaXbsXl68eY/HmfGREnlrStn79uiyJiOjCwxrIau25tAWwFPF0y+lJgdErpkpTSgpSZlFL6Rs3p87Z7gUVAWcGcG0s9QRklhwwaWl9rTDcE+BJwcErp6ZTSh/ntryml00v67RDZVvziiJgWEQeVtHWIiFvy5/LViDg/ItbL21pExGURsSB/nk/Ja631A13U8z5ST90TUkrLUkpLUkq/TCndnPepc/2oZX53R8TrEbEkssN6O9bosmn+P740Ih6vq651UkrJ21q6Aa8AQ/L73wI+Ak4EWpB9mp8HRN4+CrioZNrTgaeArkAb4Hrgd3lbDyABtwAbAuuTfdqeCewAtATOB8aVzC8B9wEdgS2A+cCX87bDgLlkb5xBtsXRneyD3CTgAqA12ZvhS8DQOh5v9WMg22I5BNgAaA/cDdxb0ncscELJ8IbAbOC4vP6dyXbt9yqZ90Jg17z9NuCOvK098BpwFtlWbXtgt7ztfuDbJcu5Arimjvp/mD/nnwE6A+OAH9V4zlvWMW11e/4c7gm8S7blsGv+Wq+X9900b9usnvWlvloGA8uAS/J1Y33gbGAqsF2+/H7AJg299rU8jpHA+8DQ/LHcArwM/C/Qimz9fbmk//8Bv8qf953yee+Tt10MPAF0AroB/wLm5G31rlt5HbfWUeNXgGklwxsAHwNfrOd/cXCNZf8X2f/jdqVtNaYZy4rraH/gzXqWkYCtG1pfa5nuYmBsA+8lrcj+v/8nf772AZYC2+XttwB/Ilv3ewDPA8fnbScD08neSzYGHqZkXS59nDTwPlJL3Y83UHd968cKrzHw33n9bYArgck13luWAl/I268C/r6679FFuTV5Ac3pxsrBPLOkbYP8n+Oz+fAoVgzmGcC+JcOfy99IWvJJCGxV0v5A1T9iPrwe2Zt/93w4AZ8vab8LOC+//zfg9Frq3w2YVWPc94Hf1PF4V3gMNdp2AhaVDFe/GeTDhwNP1JjmeuAHJfO+qaTtAODf+f0jgWfqWO7hwD/y+y2A14Fd6+j7InBAyfBQ4JX8ftVz3lAwLybbEpsBnFbj9fxSfv8U4P4G1pf6ahkMfAi0LWl/jmzLpbba6nzta+k7EnioZPirwNtAi3y4fT6/jmRh+zHQvqT/T4FR+f2XKPkAQHZcuCoc6123qD+YjwKeKhnePK9p+3r+FwcDy/PX501gMnBESVs5wbwN8HE9y6gZzLWur7VMdxMloU32QWYxsAR4Px+3F9m6u15Jv9/lz1OLfH3oVdJ2EnnYk+1RO6mkbQh1B3O97yM16r6ROj5s5O0NrR/1vcYd8xo7lDyfpc9Ru3ze3epa/rp0a47Hoork9ao7KaV3872J7ero2x34Y0QsLxn3Mdmxziqza/S/qsYu0yB703q15vLJ/tmqlt2NLAhqq6FLRCwuGdeCbCuoXhGxAdnW6ZfJPqUDtI+IFimlj+tY1m41ltWSkuOIjagfsq2I6yJiS7KtoyUppfF19O3CJ88V+f0udfSty6YppWW1jB8NfBN4KP97VQPzaaiW+Sk77lmlvucA6n7uavNGyf33gAUlr9l7+d92eT1vppSW1qhzQMljmF2jrUqj1y2yDz7tawwvJ/vw+u96ppuXUupay/hlZFukNbUi+zBcpT1ZWJar3Od8IVnoA5BSehPomO8WfyEf3QWYnVIqfT94lez/e9O81prry+al05a0ld6vqZz3kdK6t61nXg2tH58sIKIF8GOyvXedyV5PyB5b1XNeXXdK6e2IeJOVH9s6yWPM647ZwP4ppY4lt7YppbklfVKN/ifV6L9+Smlcmcuq7fjXbLLdlqXzbJ9SOqCMeZ5FFoS7pZQ2ItsFBdk/ec3aq5b1eI1ltUspfbvM+mv9mkoeXneRheHRrBj0Nc0je2OqskU+bk24FTg4IvqR7Sa8t4H+DdVS2/NX6zHMCpoHdIqI0pDcguywCGSHF7rVaKuyOuvWFGDLqmOkKaV3gSfJDp00xiyy45fVwZkfg+/OimG0A/BsI5dRn0eAgZGfg1GHeUC3quPGuarnegHZB4ia60vp61A679LXpKZVeR95GNi1nrobWj9KDSPbjT6E7CSyHvn40nMhquvOX6tOrLn/zyZlMK87rgN+XHWCQ0R0joiDG+j//aoTJvKTQQ4rc1k3Ad+LiF0is3W+3PHA0vwko/Xzk0h6R8TABuYH2dbFe2QnS3UCflCj/Q1WDNP7gG0j4uiIaJXfBkbEDmUs6z7gcxFxRmQnTbWPiN1K2m8hO5RwEPUH8++A8/PnelOy459lfc+yISmlOcCEfPm/Tym918Akq1rLTcCPImKb/DXsGxX+rnVKaTbZse+fRkTbiOgLHF9S511k6+TG+Zv3qSWTN3rdyp/LmWTHb6ucA3wrIs6uetwR0S8i7ihjfrPITiS7JCLaRXaS3dlkYfdUSde9yXb1rlEppQeBx4B7I2K3yL6C1ArYvaTb02Rb3efk/xuDyQ4z3JHvzbiL7P2iff6/eyYrvg6nR8TmEdGR7Iz0upT9PpJSephsD9Af8/eOlvnyT46I/y5j/SjVHviAbCt8A+AntfQ5ICI+HxGtgR+RHc5Y57eWwWBel1wFjAEejIilZG8Qu9XVOaX0R7KTge6IiLfITrTZv67+Naa9m2w30u1kJ1jcC3TK/+EPJDs+/DLZJ/ObyD7RNuRKspOSFuS1/7WWx3dofubn1fnurv2AI8g+Bb/OJyc3NVT/UrKzQ7+aT/cC8MWS9n+Q7Rr7Z0qp5u64UhcBE8m2yKYC/8zHrSmjgT7U/+GgsbVcTvYG/CDwFnAz2fNfaUeSbd3MA/5Idk7Aw3nbhWRbnC/ndVU/7tVctyA7/+DokvmNIzshah/gpXw35w1kJ/+V43CyE+1mkm3R7Qt8pepwQUS0JTtOPLrM+a2q/yL7gHkr2fHll8mOpQ8FSCl9SLZ+70/2XP0KOCalVLXr/lTgHbLj+n8n+1/+dd52I9nzPwV4huw5WUZ2aGwFjXgfOTSf351ku5z/RbarumodqG/9KHUL2boyl+xEtadq6XM72Qf8N4FdyPaCfSpUnQEsNSuRfcXp9pTSTU1YwxfI3ni7J/8RV0u+VfsM2QmSrzXUfw0s71SyE43OqfSyKi2yr2lel1Lq3mBnrRUGs5qdfPfoQ2RvrEsb6l+hGloBdwDPppR+2BQ1qHmKiPXJ9iA9SHby6O/JdgOf0aSFqZq7stWsRMRost1qZzRhKO9Atnvyc2S7+KW1KcgOKywi28swg+ycBRWEW8ySJBWIW8ySJBWIwSxJUoGs07/8temmm6YePXo0dRmSJK2SSZMmLUgpda6tbZ0O5h49ejBx4sSmLkOSpFUSEXX+hoK7siVJKhCDWZKkAjGYJUkqkHX6GLMkfZp89NFHzJkzh/fff7/hzlontG3blq5du9KqVW1XEq2dwSxJBTFnzhzat29Pjx49yK/PrnVYSomFCxcyZ84cttxyy7Knc1e2JBXE+++/zyabbGIof0pEBJtssskq7wExmCWpQAzlT5fGvJ4GsyQJgBEjRnDllZ9cV2Xo0KGccMIJ1cNnnXUWl19+OWPGjOHiiy8G4N5772X69OnVfQYPHrzGfl/iJz/5ySpPc8EFF/Dww7Vd4rlho0aN4pRTTgHguuuu45ZbbmnUfFaXwSxJRRWxZm8N2HPPPRk3bhwAy5cvZ8GCBUybNq26fdy4cQwaNIiDDjqI8847D1g5mNekxgTzD3/4Q4YMGbLayz755JM55phjVns+jWEwS5IAGDRoEE8++SQA06ZNo3fv3rRv355FixbxwQcfMGPGDPr371+9ZTlu3DjGjBnD2WefzU477cSLL74IwN13382uu+7KtttuyxNPPAFkx8+PO+44+vTpw84778xjjz0GrLiVCnDggQcyduxYzjvvPN577z122mknjjrqqJVqbdeuHSNGjGDHHXdk3333Zf78+QB861vf4p577gGyX4c855xz6NOnD7vuuiszZ84EYP78+RxyyCEMHDiQgQMH8o9//GOl+Y8cOZKf//znQLYX4Nxzz13pMX388cecffbZDBw4kL59+3L99dev/ouAwSxJynXp0oWWLVsya9Ysxo0bxx577MFuu+3Gk08+ycSJE+nTpw+tW7eu7l+19XzppZcyefJkevbsCcCyZcsYP348V155JRdeeCEAv/zlL4kIpk6dyu9+9zuOPfbYek+Kuvjii1l//fWZPHkyt91220rt77zzDgMGDGDatGnsvffe1cupqUOHDkydOpVTTjmFM844A4DTTz+dESNGMGHCBH7/+9+vsLu+LrU9pptvvpkOHTowYcIEJkyYwI033sjLL7/c4Lwa4telJEnVBg0axLhx4xg3bhxnnnkmc+fOZdy4cXTo0IE999yzrHl8/etfB2CXXXbhlVdeAeDvf/87p556KgDbb7893bt35/nnn290neuttx6HH344AN/85jerl1nTkUceWf13xIgRADz88MMr7H5/6623ePvtt1f5MT344INMmTKlegt9yZIlvPDCC6v01ajaGMylPBtS+kRKTV2BmkDVceapU6fSu3dvunXrxmWXXcZGG23EcccdV9Y82rRpA0CLFi1YtmxZvX1btmzJ8uXLq4cb++MqdZ39XDq+6v7y5ct56qmnaNu2bdnzr+0xpZS45pprGDp0aKNqrou7siVJ1QYNGsR9991Hp06daNGiBZ06dWLx4sU8+eSTDBo0aKX+7du3Z+nSpQ3Od6+99qreJf38888za9YstttuO3r06MHkyZNZvnw5s2fPZvz48dXTtGrVio8++qjW+S1fvrx6S/X222/n85//fK397rzzzuq/e+yxBwD77bcf11xzTXWfyZMnN1h/bYYOHcq1115bXePzzz/PO++806h5lTKYJUnV+vTpw4IFC9h9991XGNehQwc23XTTlfofccQRXHrppey8887VJ3/V5jvf+Q7Lly+nT58+HH744YwaNYo2bdqw5557suWWW9KrVy9OO+00+vfvXz3N8OHD6du3b60nf2244YaMHz+e3r178+ijj3LBBRfUutxFixbRt29frrrqKq644goArr76aiZOnEjfvn3p1asX1113XdnPT6kTTjiBXr160b9/f3r37s1JJ53U4B6CckRah3dXDRgwIK3R6zG7K1v6xDr83rCumjFjBjvssENTl7FOaNeuXYPHhXv06MHEiRNr/UCxNtX2ukbEpJTSgNr6u8UsSVKBePKXpFrFhe5BWtse2O8B3pm3+scom4Oxz49l4rz695jeM+4eXvnwFV6Z98pqL29Al1o3bivCLWZJkgrEYJYkqUAMZkmSCsRgliSpQAxmSRIAl//gcm6/8fbq4VOHncpF37uoeviKC6/gtutv4/EHH2fUL0YBMPavY3np+Zeq+5x06ElMf7bxV5s6/ejTWbqk4R8sqc3IM0byyH2PAHDR9y5aoa51iWdlS1JBDbxx4Bqd34QTJ9Tb3m9gPx7+88NwYvbLWovfXMw7Sz85S3zqxKmMGDmCPrv0Ye/99gayYN5ryF5ste1Wa6TGq3571RqZz/k/P3+NzKcpuMUsSQKg74C+TJk0BYCXnnuJntv1ZIN2G/DW4rf48IMPeXnmy2zfZ3v+fOef+dn//oxnJzzLEw89wdUXXc2wLw1jzitzAHjkvkc49ivHcsjnD+GZp59ZaTmTxk1i+NeHc8bRZ3DIXofw03N/Wv172QftdhCL31zMvNnzOPQLh3L+Kedz2N6Hce6J5/L+e9nvaM+YMoPhhwzn6C8fzanDTmXBGwtWWkbplvsXtvkCv7r4VwwbMozjDjyOhfMXArBo4SLOOfEcjjngGI454BienfDsmn9SG8FgliQB0PmznWnZsiWvz32dKROn0GeXPuy4845MnTSVGVNmsPX2W9Oqdavq/v0G9mOvL+3Faeefxu0P3U7XHl2B7BKJo/8ymjMvPJMbL7+x1mVNmzyN7130Pe4aexdzXp3DY/c/tlKfV198lcOOPYy7H7+bDdtvyN2j72bZR8u49PxLueSGS/jtX3/LVw//Kr+65Ff1Pq733n2P3v17c/vDt7Pz7jtz7233AnDZBZcx7MRh3HL/Lfzsxp+tsNu+KbkrW5JUrc+APkyZOIUpE6cwbPgw5r8+nykTp9Buo3b0Hdi3rHnsc8A+AGzfd3tem/NarX123GlHunbPgnzo14Yyefxk9j1w3xX6bNZlM/oN7AfA/l/fnzt/fSd7DN6Dl557ie8e8V0g2+W+6Wfq/8nNVq1bsdeX9spq6rM945/ILpQx/onxKxyHfuftd3j3nXfZYMMNynqclWIwS5Kq9RvQjykTpzDz3zPpuX1PNuuyGbdefyvt2rfjwMMPLGseVVvVLVq04ONlH9feqcYPy9V22caa4yICEmy17Vb8+s+/LqsWyC4tWTWv0ss2Ll++nN/8+Te0adum7HmtDe7KliRV6zugL088/AQbddyIFi1a0GHjDrz91ttMmTSFfgP6rdR/w3YbNupSh9MnT2furLksX76ch8Y8xE677rRSn6pd6gB/u/dv9BvYj+49u7PozUXV45d9tIwXn6v7qlb12X3v3bnrN3dVDz/3r+caNZ81zWCWJFXbeoetWfLmEvr071M9ruf2PWnXvh0dO3Vcqf9+B+/HrdfeylH7HVV98lc5evXrxaX/eymH7X0YXbp1YfD+g1fq071nd+4efTeH7X0Yby15i0OPPZRWrVtx8fUX84uf/IJhQ4YxbL9h1SG9qr73o+8x/dnpHDnkSL4x+Bv84bd/aNR81jQv+1jKyz5K1WJkU1fQ/Dyw3wNs2r1pL1G4NkwaN4lbr7uVK265os4+82bPY8SxI7jz0TvXYmV1W52LWHjZR0mS1mGe/CVJWqt2GbQLuwzapd4+Xbp1KczW8trmFrMkSQViMEtSQSxnOay7p/2oFo05j6uiwRwRIyJiWkT8KyJ+FxFtI2LLiHg6ImZGxJ0R0Trv2yYfnpm396hkbZJUNDPfmsmyd5YZzp8SKSUWLlxI27ZtV2m6ih1jjojNgdOAXiml9yLiLuAI4ADgipTSHRFxHXA8cG3+d1FKaeuIOAK4BDi8UvVJUtGM/OdIRjKSrTfamvXcoVkoM5bMaNR0bdu2pWvXrqs0TaVP/moJrB8RHwEbAK8B+wDD8vbRwEiyYD44vw9wD/CLiIi0Ln+fS5JWwaIPF3H6U6c3dRmqRfrB2ouiin0kSynNBX4OzCIL5CXAJGBxSmlZ3m0OsHl+f3Ngdj7tsrz/JpWqT5KkIqpYMEfExmRbwVsCXYANgS+vgfkOj4iJETFx/vz5qzs7SZIKpZIHMYYAL6eU5qeUPgL+AOwJdIyIql3oXYG5+f25QDeAvL0DsLDmTFNKN6SUBqSUBnTu3LmC5UuStPZVMphnAbtHxAaRXdZjX2A68BhwaN7nWOBP+f0x+TB5+6MeX5YkNTeVPMb8NNlJXP8EpubLugE4FzgzImaSHUO+OZ/kZmCTfPyZwHmVqk2SpKKq6FnZKaUfAD+oMfolYNda+r4PHFbJeiRJKjq/KCdJUoEYzJIkFYjBLElSgRjMkiQViMEsSVKBGMySJBWIwSxJUoEYzJIkFYjBLElSgRjMkiQViMEsSVKBGMySJBWIwSxJUoEYzJIkFYjBLElSgRjMkiQViMEsSVKBGMySJBWIwSxJUoEYzJIkFYjBLElSgRjMkiQViMEsSVKBGMySJBWIwSxJUoEYzJIkFYjBLElSgRjMkiQViMEsSVKBGMySJBWIwSxJUoEYzJIkFYjBLElSgRjMkiQViMEsSVKBGMySJBWIwSxJUoEYzJIkFYjBLElSgRjMkiQViMEsSVKBGMySJBWIwSxJUoEYzJIkFYjBLElSgRjMkiQViMEsSVKBGMySJBWIwSxJUoEYzJIkFYjBLElSgRjMkiQViMEsSVKBVDSYI6JjRNwTEf+OiBkRsUdEdIqIhyLihfzvxnnfiIirI2JmREyJiP6VrE2SpCKq9BbzVcBfU0rbA/2AGcB5wCMppW2AR/JhgP2BbfLbcODaCtcmSVLhVCyYI6ID8AXgZoCU0ocppcXAwcDovNto4Gv5/YOBW1LmKaBjRHyuUvVJklREldxi3hKYD/wmIp6JiJsiYkNgs5TSa3mf14HN8vubA7NLpp+Tj5MkqdmoZDC3BPoD16aUdgbe4ZPd1gCklBKQVmWmETE8IiZGxMT58+evsWIlSSqCSgbzHGBOSunpfPgesqB+o2oXdf73P3n7XKBbyfRd83ErSCndkFIakFIa0Llz54oVL0lSU6hYMKeUXgdmR8R2+ah9genAGODYfNyxwJ/y+2OAY/Kzs3cHlpTs8pYkqVloWeH5nwrcFhGtgZeA48g+DNwVEccDrwLfyPveDxwAzATezftKktSsVDSYU0qTgQG1NO1bS98EfLeS9UiSVHT+8pckSQViMEuSVCAGsyRJBWIwS5JUIAazJEkF0uBZ2RGxHtkFKLoA7wH/Sin9p/6pJElSY9QZzBHREzgXGAK8QPa7122BbSPiXeB6YHRKafnaKFSSpOagvi3mi8guvXhS/h3jahHxGWAYcDSfXClKkiStpjqDOaV0ZD1t/wGurEhFkiQ1Yw2e/BURh0VE+/z+/4uIP0RE/8qXJklS81POWdn/L6W0NCI+T/ZTmjeT7eKWJElrWDnB/HH+9yvADSmlvwCtK1eSJEnNVznBPDcirgcOB+6PiDZlTidJklZROQH7DeBvwNCU0mKgE3B2RauSJKmZavAHRlJK70bEY0C3kpO+FlS2LEmSmqdyfvnrR8C3gBeBqu8zJ2CfypUlSVLz1GAwk+3K7plS+rDSxUiS1NyVc4z5X0DHShciSZLK22L+KfBMRPwL+KBqZErpoIpVJUlSM1VOMI8GLgGmAl6wQpKkCionmN9NKV1d8UokSVJZwfxERPwUGMOKu7L/WbGqJElqpsoJ5p3zv7uXjPPrUpIkVUA5PzDyxbVRiCRJqufrUhHxzYior71nfsUpSZK0htS3xbwJ2dekJgGTgPlAW2BrYG+yn+U8r+IVSpLUjNQZzCmlqyLiF2THkvcE+gLvATOAo1NKs9ZOiZIkNR/1HmNOKX0MPJTfJElShXldZUmSCsRgliSpQBoM5ohosTYKkSRJ5W0xvxARl0ZEr4pXI0lSM1dOMPcDngduioinImJ4RGxU4bokSWqWGgzmlNLSlNKNKaVBwLnAD4DXImJ0RGxd8QolSWpGyjrGHBEHRcQfgSuBy4CtgD8D91e4PkmSmpVyLmLxAvAYcGlKaVzJ+Hsi4guVKUuSpOapnGDum1J6u7aGlNJpa7geSZKatXJO/vplRHSsGoiIjSPi1xWsSZKkZqucYO6bUlpcNZBSWsQn12iWJElrUDnBvF5EbFw1EBGdKG8XuCRJWkXlBOxlwJMRcTcQwKHAjytalSRJzVSDwZxSuiW/JvMX81FfTylNr2xZkiQ1T+Xukv43sKiqf0Rs4fWYJUla8xoM5og4lezXvt4APibbnZ2AvpUtTZKk5qecLebTge1SSgsrXYwkSc1dOWdlzwaWVLoQSZJU3hbzS8DYiPgL8EHVyJTS5RWrSpKkZqqcYJ6V31rnN0mSVCHlfF3qQoCI2CCl9G7lS5Ikqfkq57KPe0TEdLKvTBER/SLiVxWvTJKkZqick7+uBIYCCwFSSs8CXu5RkqQKKCeYSSnNrjHq4wrUIklSs1fOyV+zI2IQkCKiFdn3mmdUtixJkpqncraYTwa+C2wOzAV2Ar5T7gIiokVEPBMR9+XDW0bE0xExMyLujIjW+fg2+fDMvL3Hqolpx+sAAAfVSURBVD4YSZLWdeUE83YppaNSSpullD6TUvomsMMqLKPmFvYlwBUppa3Jfn/7+Hz88cCifPwVeT9JkpqVcoL5mjLHrSQiugJfAW7KhwPYB7gn7zIa+Fp+/+B8mLx937y/JEnNRp3HmCNiD2AQ0Dkizixp2ghoUeb8rwTOAdrnw5sAi1NKy/LhOWS7yMn/zgZIKS2LiCV5/wU16hoODAfYYostyixDkqR1Q31bzK2BdmTh3b7k9hZwaEMzjogDgf+klCatgTqrpZRuSCkNSCkN6Ny585qctSRJTa7OLeaU0uPA4xExKqX0aiPmvSdwUEQcALQl29K+CugYES3zreauZCeUkf/tBsyJiJZAB/LvTkuS1FyUc4z53Yi4NCLuj4hHq24NTZRS+n5KqWtKqQdwBPBoSuko4DE+2eI+FvhTfn9MPkze/mhKKa3Kg5EkaV1XTjDfRvZznFsCFwKvABNWY5nnAmdGxEyyY8g35+NvBjbJx58JnLcay5AkaZ1Uzg+MbJJSujkiTi/Zvb1KwZxSGguMze+/BOxaS5/3gcNWZb6SJH3alBPMH+V/X4uIrwDzgE6VK0mSpOarnGC+KCI6AGeRfX95I+CMilYlSVIzVc71mO/L7y4BvggQEQazJEkVUNbVpWpxZsNdJEnSqmpsMPtTmZIkVUBjg9nvF0uSVAH1/Vb2UmoP4ADWr1hFkiQ1Y/X9JGf7utokSVJlNHZXtiRJqgCDWZKkAjGYJUkqEINZkqQCMZglSSoQg1mSpAIxmCVJKhCDWZKkAjGYJUkqEINZkqQCMZglSSoQg1mSpAIxmCVJKhCDWZKkAjGYJUkqEINZkqQCMZglSSoQg1mSpAIxmCVJKhCDWZKkAjGYJUkqEINZkqQCMZglSSoQg1mSpAIxmCVJKhCDWZKkAjGYJUkqEINZkqQCMZglSSoQg1mSpAIxmCVJKhCDWZKkAjGYJUkqEINZkqQCMZglSSoQg1mSpAIxmCVJKhCDWZKkAjGYJUkqEINZkqQCMZglSSoQg1mSpAIxmCVJKhCDWZKkAqlYMEdEt4h4LCKmR8S0iDg9H98pIh6KiBfyvxvn4yMiro6ImRExJSL6V6o2SZKKqpJbzMuAs1JKvYDdge9GRC/gPOCRlNI2wCP5MMD+wDb5bThwbQVrkySpkCoWzCml11JK/8zvLwVmAJsDBwOj826jga/l9w8GbkmZp4COEfG5StUnSVIRrZVjzBHRA9gZeBrYLKX0Wt70OrBZfn9zYHbJZHPycZIkNRsVD+aIaAf8HjgjpfRWaVtKKQFpFec3PCImRsTE+fPnr8FKJUlqehUN5ohoRRbKt6WU/pCPfqNqF3X+9z/5+LlAt5LJu+bjVpBSuiGlNCClNKBz586VK16SpCZQybOyA7gZmJFSurykaQxwbH7/WOBPJeOPyc/O3h1YUrLLW5KkZqFlBee9J3A0MDUiJufj/ge4GLgrIo4HXgW+kbfdDxwAzATeBY6rYG2SJBVSxYI5pfR3IOpo3reW/gn4bqXqkSRpXeAvf0mSVCAGsyRJBWIwS5JUIAazJEkFYjBLklQgBrMkSQViMEuSVCAGsyRJBWIwS5JUIAazJEkFYjBLklQgBrMkSQViMEuSVCAGsyRJBWIwS5JUIAazJEkFYjBLklQgBrMkSQViMEuSVCAGsyRJBWIwS5JUIAazJEkFYjBLklQgBrMkSQViMEuSVCAGsyRJBWIwS5JUIAazJEkFYjBLklQgBrMkSQViMEuSVCAGsyRJBWIwS5JUIAazJEkFYjBLklQgBrMkSQViMEuSVCAGsyRJBWIwS5JUIAazJEkFYjBLklQgBrMkSQViMEuSVCAGsyRJBWIwS5JUIAazJEkFYjBLklQgBrMkSQViMEuSVCAGsyRJBWIwS5JUIAazJEkFYjBLklQghQrmiPhyRDwXETMj4rymrkeSpLWtMMEcES2AXwL7A72AIyOiV9NWJUnS2lWYYAZ2BWamlF5KKX0I3AEc3MQ1SZK0VhUpmDcHZpcMz8nHSZLUbLRs6gJWVUQMB4bng29HxHNNWY/0qTWSTYEFTV2GVAQxMtb0LLvX1VCkYJ4LdCsZ7pqPW0FK6QbghrVVlNRcRcTElNKApq5Dam6KtCt7ArBNRGwZEa2BI4AxTVyTJElrVWG2mFNKyyLiFOBvQAvg1ymlaU1cliRJa1WklJq6BkkFFBHD80NHktYig1mSpAIp0jFmSZKaPYNZaoYi4rMRcUdEvBgRkyLi/ojYNiLei4jJETE9Iq6LiPUiYnBE3Fdj+lERcWhT1S99mhXm5C9Ja0dEBPBHYHRK6Yh8XD9gM+DFlNJOEdESeBT4GvBmkxUrNUNuMUvNzxeBj1JK11WNSCk9S8kv76WUlgHjgK3XfnlS82YwS81Pb2BSfR0iYgNgX2DqWqlIUjWDWVKpnhExGfgH8JeU0gNAXV/d8CsdUgV4jFlqfqYBdZ249WJKaaca4xYCG9cY1wl/R1uqCLeYpebnUaBNfkEYACKiLyv+Vn2pF4AuEbFD3rc70A+YXOlCpebILWapmUkppYj4L+DKiDgXeB94BTijjv4fRMQ3gd9ERFvgI+CElNKStVWz1Jz4y1+SJBWIu7IlSSoQg1mSpAIxmCVJKhCDWZKkAjGYJUkqEINZkqQCMZglSSoQg1mSpAL5/yDB6kkmydk3AAAAAElFTkSuQmCC\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" } } ] }, { "cell_type": "markdown", "metadata": { "id": "kIWlIBvkMLAS" }, "source": [ "Pipeline does not help improve latency on CPU." ] }, { "cell_type": "markdown", "metadata": { "id": "s6ZTYbyBB9F8" }, "source": [ "## 3. Inference time | ONNX Runtime" ] }, { "cell_type": "markdown", "metadata": { "id": "6WuzVpniCACF" }, "source": [ "[ONNX Runtime](https://onnxruntime.ai/) helps **accelerate PyTorch and TensorFlow models in production, on CPU or GPU**. As an open source library built for performance and broad platform support, ONNX Runtime is used in products and services handling over 20 billion inferences each day. " ] }, { "cell_type": "markdown", "metadata": { "id": "n9AcUibxJGXt" }, "source": [ "### 3.1 Old method | convert_graph_to_onnx.py" ] }, { "cell_type": "markdown", "metadata": { "id": "G6AD86PrI99o" }, "source": [ "You can use ONNX Runtime and Hugging Face Transformers together to improve the experience of training and deploying NLP models. Hugging Face has made it easy to inference Transformer models with ONNX Runtime with the [transformers/convert_graph_to_onnx.py](https://github.com/huggingface/transformers/blob/master/src/transformers/convert_graph_to_onnx.py) which generates a model that can be loaded by ONNX Runtime." ] }, { "cell_type": "code", "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "691vn7sAGypy", "outputId": "e29eb83b-952e-47a0-de37-798f0b148849" }, "source": [ "!python -m transformers.onnx --help" ], "execution_count": 26, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "usage: Hugging Face ONNX Exporter tool [-h] -m MODEL\n", " [--feature {causal-lm,causal-lm-with-past,default,default-with-past,masked-lm,seq2seq-lm,seq2seq-lm-with-past,sequence-classification,sequence-classification-with-past,token-classification}]\n", " [--opset OPSET] [--atol ATOL]\n", " output\n", "\n", "positional arguments:\n", " output Path indicating where to store generated ONNX model.\n", "\n", "optional arguments:\n", " -h, --help show this help message and exit\n", " -m MODEL, --model MODEL\n", " Model's name of path on disk to load.\n", " --feature {causal-lm,causal-lm-with-past,default,default-with-past,masked-lm,seq2seq-lm,seq2seq-lm-with-past,sequence-classification,sequence-classification-with-past,token-classification}\n", " Export the model with some additional feature.\n", " --opset OPSET ONNX opset version to export the model with (default\n", " 12).\n", " --atol ATOL Absolute difference tolerence when validating the\n", " model.\n" ] } ] }, { "cell_type": "markdown", "metadata": { "id": "jpByQiFAeOoY" }, "source": [ "````\n", "SUPPORTED_PIPELINES = [\n", " \"feature-extraction\",\n", " \"ner\",\n", " \"sentiment-analysis\",\n", " \"fill-mask\",\n", " \"question-answering\",\n", " \"text-generation\",\n", " \"translation_en_to_fr\",\n", " \"translation_en_to_de\",\n", " \"translation_en_to_ro\",\n", "]\n", "````" ] }, { "cell_type": "markdown", "metadata": { "id": "MIq7eMLLdBWf" }, "source": [ "Get the file convert_graph_to_onnx.py" ] }, { "cell_type": "code", "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "GrSHTpBILZPd", "outputId": "04b9e71e-4591-4e9b-ec3f-7a0206bbdb5e" }, "source": [ "!wget https://raw.githubusercontent.com/huggingface/transformers/master/src/transformers/convert_graph_to_onnx.py" ], "execution_count": 27, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "--2021-10-23 16:56:09-- https://raw.githubusercontent.com/huggingface/transformers/master/src/transformers/convert_graph_to_onnx.py\n", "Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.108.133, 185.199.109.133, 185.199.110.133, ...\n", "Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.108.133|:443... connected.\n", "HTTP request sent, awaiting response... 200 OK\n", "Length: 18640 (18K) [text/plain]\n", "Saving to: ‘convert_graph_to_onnx.py’\n", "\n", "\rconvert_graph_to_on 0%[ ] 0 --.-KB/s \rconvert_graph_to_on 100%[===================>] 18.20K --.-KB/s in 0s \n", "\n", "2021-10-23 16:56:10 (43.5 MB/s) - ‘convert_graph_to_onnx.py’ saved [18640/18640]\n", "\n" ] } ] }, { "cell_type": "markdown", "metadata": { "id": "rtxn_TnPUqYZ" }, "source": [ "### 3.1.1 Tokenize the inputs" ] }, { "cell_type": "code", "metadata": { "id": "371tF1SsYhwM" }, "source": [ "from transformers import AutoTokenizer\n", "\n", "tokenizer = AutoTokenizer.from_pretrained(model_checkpoint, use_fast=True)" ], "execution_count": 28, "outputs": [] }, { "cell_type": "code", "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "oDusFXaeUqYa", "outputId": "91bee422-0270-4b47-efaf-0f87158826af" }, "source": [ "num=100\n", "\n", "total = 0\n", "for i in range(num):\n", " start = perf_counter()\n", " # WARNING!!!!!!! return_tensors=\"np\" and not return_tensors=\"pt\"\n", " inputs = tokenizer(question, context, add_special_tokens=True, return_tensors=\"np\")\n", " diff = perf_counter() - start\n", " total += diff\n", "\n", "onnx_mean_tokenizer = round((total/num)*1000,2)\n", "print(f'average time: {onnx_mean_tokenizer} ms')" ], "execution_count": 29, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "average time: 0.71 ms\n" ] } ] }, { "cell_type": "markdown", "metadata": { "id": "meVmR4E6L3va" }, "source": [ "### 3.1.2 Model on CPU" ] }, { "cell_type": "markdown", "metadata": { "id": "Ce7NwpoITBr3" }, "source": [ "#### Installation" ] }, { "cell_type": "code", "metadata": { "id": "arWxHIgrJrqq" }, "source": [ "%%capture\n", "# onnxruntime cpu\n", "!pip install onnx\n", "!pip install onnxruntime" ], "execution_count": 30, "outputs": [] }, { "cell_type": "code", "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 35 }, "id": "JyCazZPzMdFs", "outputId": "6aef6e85-348e-48dc-bf8d-9b1745c5045a" }, "source": [ "import onnxruntime as ort\n", "ort.get_device()" ], "execution_count": 31, "outputs": [ { "output_type": "execute_result", "data": { "application/vnd.google.colaboratory.intrinsic+json": { "type": "string" }, "text/plain": [ "'CPU'" ] }, "metadata": {}, "execution_count": 31 } ] }, { "cell_type": "code", "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "pfevyxkXdopX", "outputId": "fc6a6eac-c8e4-47f2-cfa2-d770389b4711" }, "source": [ "import onnxruntime\n", "print(\"onnxruntime:\",onnxruntime.__version__)" ], "execution_count": 32, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "onnxruntime: 1.9.0\n" ] } ] }, { "cell_type": "markdown", "metadata": { "id": "Q7JbUW_0dE53" }, "source": [ "#### Convert the transformer model to its quantized onnx version" ] }, { "cell_type": "code", "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "ASCjNxFpJosm", "outputId": "b446fa9e-9b1b-4b69-8f58-8fa8f064912b" }, "source": [ "model_checkpoint_onnx = 'onnx_cpu/' + model_checkpoint.replace('/','-') + '.onnx'\n", "\n", "!python convert_graph_to_onnx.py \\\n", "--pipeline question-answering \\\n", "--model {model_checkpoint} \\\n", "--tokenizer {model_checkpoint} \\\n", "--framework pt \\\n", "--opset 11 \\\n", "--check-loading \\\n", "--use-external-format \\\n", "--quantize \\\n", "{model_checkpoint_onnx}" ], "execution_count": 33, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "\n", "====== Converting model to ONNX ======\n", "ONNX opset version set to: 11\n", "Loading pipeline (model: pierreguillou/bert-base-cased-squad-v1.1-portuguese, tokenizer: pierreguillou/bert-base-cased-squad-v1.1-portuguese)\n", "Creating folder /content/onnx_cpu\n", "Using framework PyTorch: 1.9.0+cu111\n", "Found input input_ids with shape: {0: 'batch', 1: 'sequence'}\n", "Found input token_type_ids with shape: {0: 'batch', 1: 'sequence'}\n", "Found input attention_mask with shape: {0: 'batch', 1: 'sequence'}\n", "Found output output_0 with shape: {0: 'batch', 1: 'sequence'}\n", "Found output output_1 with shape: {0: 'batch', 1: 'sequence'}\n", "Ensuring inputs are in correct order\n", "position_ids is not present in the generated input list.\n", "Generated inputs order: ['input_ids', 'attention_mask', 'token_type_ids']\n", "\n", "====== Optimizing ONNX model ======\n", "2021-10-23 16:56:33.317869392 [W:onnxruntime:, inference_session.cc:1419 Initialize] Serializing optimized model with Graph Optimization level greater than ORT_ENABLE_EXTENDED and the NchwcTransformer enabled. The generated model may contain hardware specific optimizations, and should only be used in the same environment the model was optimized in.\n", "Optimized model has been written at /content/onnx_cpu/pierreguillou-bert-base-cased-squad-v1.onnx: ✔\n", "/!\\ Optimized model contains hardware specific operators which might not be portable. /!\\\n", "As of onnxruntime 1.4.0, models larger than 2GB will fail to quantize due to protobuf constraint.\n", "This limitation will be removed in the next release of onnxruntime.\n", "WARNING:root:onnxruntime.quantization.quantize is deprecated.\n", " Please use quantize_static for static quantization, quantize_dynamic for dynamic quantization.\n", "Warning: Unsupported operator LayerNormalization. No schema registered for this operator.\n", "Warning: Unsupported operator FusedMatMul. No schema registered for this operator.\n", "Warning: Unsupported operator LayerNormalization. No schema registered for this operator.\n", "Warning: Unsupported operator Gelu. No schema registered for this operator.\n", "Warning: Unsupported operator LayerNormalization. No schema registered for this operator.\n", "Warning: Unsupported operator FusedMatMul. No schema registered for this operator.\n", "Warning: Unsupported operator LayerNormalization. No schema registered for this operator.\n", "Warning: Unsupported operator Gelu. No schema registered for this operator.\n", "Warning: Unsupported operator LayerNormalization. No schema registered for this operator.\n", "Warning: Unsupported operator FusedMatMul. No schema registered for this operator.\n", "Warning: Unsupported operator LayerNormalization. No schema registered for this operator.\n", "Warning: Unsupported operator Gelu. No schema registered for this operator.\n", "Warning: Unsupported operator LayerNormalization. No schema registered for this operator.\n", "Warning: Unsupported operator FusedMatMul. No schema registered for this operator.\n", "Warning: Unsupported operator LayerNormalization. No schema registered for this operator.\n", "Warning: Unsupported operator Gelu. No schema registered for this operator.\n", "Warning: Unsupported operator LayerNormalization. No schema registered for this operator.\n", "Warning: Unsupported operator FusedMatMul. No schema registered for this operator.\n", "Warning: Unsupported operator LayerNormalization. No schema registered for this operator.\n", "Warning: Unsupported operator Gelu. No schema registered for this operator.\n", "Warning: Unsupported operator LayerNormalization. No schema registered for this operator.\n", "Warning: Unsupported operator FusedMatMul. No schema registered for this operator.\n", "Warning: Unsupported operator LayerNormalization. No schema registered for this operator.\n", "Warning: Unsupported operator Gelu. No schema registered for this operator.\n", "Warning: Unsupported operator LayerNormalization. No schema registered for this operator.\n", "Warning: Unsupported operator FusedMatMul. No schema registered for this operator.\n", "Warning: Unsupported operator LayerNormalization. No schema registered for this operator.\n", "Warning: Unsupported operator Gelu. No schema registered for this operator.\n", "Warning: Unsupported operator LayerNormalization. No schema registered for this operator.\n", "Warning: Unsupported operator FusedMatMul. No schema registered for this operator.\n", "Warning: Unsupported operator LayerNormalization. No schema registered for this operator.\n", "Warning: Unsupported operator Gelu. No schema registered for this operator.\n", "Warning: Unsupported operator LayerNormalization. No schema registered for this operator.\n", "Warning: Unsupported operator FusedMatMul. No schema registered for this operator.\n", "Warning: Unsupported operator LayerNormalization. No schema registered for this operator.\n", "Warning: Unsupported operator Gelu. No schema registered for this operator.\n", "Warning: Unsupported operator LayerNormalization. No schema registered for this operator.\n", "Warning: Unsupported operator FusedMatMul. No schema registered for this operator.\n", "Warning: Unsupported operator LayerNormalization. No schema registered for this operator.\n", "Warning: Unsupported operator Gelu. No schema registered for this operator.\n", "Warning: Unsupported operator LayerNormalization. No schema registered for this operator.\n", "Warning: Unsupported operator FusedMatMul. No schema registered for this operator.\n", "Warning: Unsupported operator LayerNormalization. No schema registered for this operator.\n", "Warning: Unsupported operator Gelu. No schema registered for this operator.\n", "Warning: Unsupported operator LayerNormalization. No schema registered for this operator.\n", "Warning: Unsupported operator FusedMatMul. No schema registered for this operator.\n", "Warning: Unsupported operator LayerNormalization. No schema registered for this operator.\n", "Warning: Unsupported operator Gelu. No schema registered for this operator.\n", "Warning: Unsupported operator LayerNormalization. No schema registered for this operator.\n", "Quantized model has been written at /content/onnx_cpu/pierreguillou-bert-base-cased-squad-v1-quantized.onnx: ✔\n", "\n", "====== Check exported ONNX model(s) ======\n", "Checking ONNX model loading from: /content/onnx_cpu/pierreguillou-bert-base-cased-squad-v1.1-portuguese.onnx ...\n", "Model /content/onnx_cpu/pierreguillou-bert-base-cased-squad-v1.1-portuguese.onnx correctly loaded: ✔\n", "Checking ONNX model loading from: /content/onnx_cpu/pierreguillou-bert-base-cased-squad-v1.onnx ...\n", "Model /content/onnx_cpu/pierreguillou-bert-base-cased-squad-v1.onnx correctly loaded: ✔\n", "Checking ONNX model loading from: /content/onnx_cpu/pierreguillou-bert-base-cased-squad-v1-quantized.onnx ...\n", "Model /content/onnx_cpu/pierreguillou-bert-base-cased-squad-v1-quantized.onnx correctly loaded: ✔\n" ] } ] }, { "cell_type": "markdown", "metadata": { "id": "DaUBCbBVRTLS" }, "source": [ "#### Import the onnx quantized version of the model" ] }, { "cell_type": "code", "metadata": { "id": "evHBG4EbN99U" }, "source": [ "import onnxruntime as ort\n", "\n", "# copy/paste the path to the file xxx.quantized.onnx\n", "ort_session = ort.InferenceSession(\"/content/\" + model_checkpoint_onnx)" ], "execution_count": 34, "outputs": [] }, { "cell_type": "markdown", "metadata": { "id": "9jl869YbRNwD" }, "source": [ "#### Run the onnx model" ] }, { "cell_type": "code", "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "anXCXVTAUIgv", "outputId": "716b3014-a035-4d86-d0ec-94a345de7723" }, "source": [ "num = 100\n", "\n", "total = 0\n", "for i in range(num):\n", " start = perf_counter()\n", " outputs = ort_session.run(None, dict(inputs))\n", " diff = perf_counter() - start\n", " total += diff\n", "\n", "onnx_mean_time_cpu = round((total/num)*1000,2)\n", "print(f'average time: {onnx_mean_time_cpu} ms')" ], "execution_count": 35, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "average time: 711.5 ms\n" ] } ] }, { "cell_type": "markdown", "metadata": { "id": "4I9gMNPvVKD6" }, "source": [ "Now, we can evaluate the time to get the answer." ] }, { "cell_type": "code", "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "bUNtq4WaVV5I", "outputId": "f37643a6-cda9-46bd-e7db-49c397eaec7e" }, "source": [ "num = 100\n", "\n", "total = 0\n", "for i in range(num):\n", " start = perf_counter()\n", " # code source: https://huggingface.co/transformers/master/task_summary.html#extractive-question-answering\n", "\n", " answer_start_scores = outputs[0]\n", " answer_end_scores = outputs[1]\n", "\n", " # Get the most likely beginning of answer with the argmax of the score\n", " answer_start = np.argmax(answer_start_scores)\n", " # Get the most likely end of answer with the argmax of the score\n", " answer_end = np.argmax(answer_end_scores) + 1\n", "\n", " input_ids = inputs[\"input_ids\"].tolist()[0]\n", " answer = tokenizer.convert_tokens_to_string(tokenizer.convert_ids_to_tokens(input_ids[answer_start:answer_end]))\n", "\n", " diff = perf_counter() - start\n", " total += diff\n", " \n", " # print(f\"Question: {question}\")\n", " # print(f\"Answer: {answer}\")\n", "\n", "onnx_mean_time_cpu_answer = round((total/num)*1000,2)\n", "print(f'average time: {onnx_mean_time_cpu_answer} ms')" ], "execution_count": 36, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "average time: 0.07 ms\n" ] } ] }, { "cell_type": "code", "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "VQyCmfZUX5nI", "outputId": "0b62816c-007a-4742-e308-4c266eec788f" }, "source": [ "print(f\"Question: {question}\")\n", "print(f\"Answer: {answer}\")" ], "execution_count": 37, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "Question: Quando começou a pandemia de Covid-19 no mundo?\n", "Answer: 1 de dezembro de 2019\n" ] } ] }, { "cell_type": "markdown", "metadata": { "id": "cQEOebWYVV5J" }, "source": [ "Then, we have the total time when the model is on the CPU:" ] }, { "cell_type": "code", "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "UDRAppQ_VV5J", "outputId": "e93d474a-ac54-41b7-a471-e5b1e94adc4d" }, "source": [ "onnx_total_cpu = round(onnx_mean_tokenizer + onnx_mean_time_cpu + onnx_mean_time_cpu_answer,2)\n", "print(f'time: {onnx_total_cpu} ms')" ], "execution_count": 38, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "time: 712.28 ms\n" ] } ] }, { "cell_type": "markdown", "metadata": { "id": "rDV-A-S_cxwR" }, "source": [ "### 3.1.4 Results with ONNX Runtime" ] }, { "cell_type": "code", "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 112 }, "id": "UcXeFapAzs2I", "outputId": "54d9c54e-9a78-4d91-a0a2-053979d116bc" }, "source": [ "import pandas as pd\n", "\n", "raw_data = {\n", " 'Latency on CPU (ms)': [mean_time_cpu, onnx_mean_time_cpu],\n", " }\n", "\n", "df = pd.DataFrame(raw_data,\n", " index=pd.Index(['PyTorch (without pipeline)', 'ONNX Runtime']),\n", " columns=pd.Index(['Latency on CPU (ms)']))\n", "\n", "df" ], "execution_count": 39, "outputs": [ { "output_type": "execute_result", "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Latency on CPU (ms)
PyTorch (without pipeline)889.07
ONNX Runtime711.50
\n", "
" ], "text/plain": [ " Latency on CPU (ms)\n", "PyTorch (without pipeline) 889.07\n", "ONNX Runtime 711.50" ] }, "metadata": {}, "execution_count": 39 } ] }, { "cell_type": "code", "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 352 }, "id": "8By6zvp_cxwR", "outputId": "1134015c-17a1-458c-b27f-1716404e1bf7" }, "source": [ "import numpy as np\n", "import matplotlib.pyplot as plt\n", "\n", "labels = ['CPU']\n", "data = [mean_time_cpu, onnx_mean_time_cpu]\n", "\n", "fig = plt.figure()\n", "ax = fig.add_axes([0,0,1,1])\n", "\n", "X = np.arange(1)\n", "ax.bar(X - 0.1, data[0], color = 'r', width = 0.2, label='PyTorch (without pipeline)')\n", "ax.bar(X + 0.1, data[1], color = 'g', width = 0.2, label='ONNX Runtime')\n", "\n", "# axes and title\n", "x = np.arange(len(labels)) # the label locations\n", "ax.set_xticks(x)\n", "ax.set_xticklabels(labels)\n", "ax.set_ylabel('Latency (ms)')\n", "ax.set_title('Inference latency (CPU) in Google Colab')\n", "\n", "leg = ax.legend();" ], "execution_count": 40, "outputs": [ { "output_type": "display_data", "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" } } ] }, { "cell_type": "markdown", "metadata": { "id": "1WI18RL5cxwR" }, "source": [ "- ONNX Runtime helps improve latency on CPU (just a bit).\n", "- On CPU, using **ONNX Runtime** allows inferring up to **1.2 times faster** than with the PyTorch model (**632.58 ms** with ONNX Runtime)." ] }, { "cell_type": "markdown", "metadata": { "id": "qPQdX75lWG1C" }, "source": [ "### 3.2 New method | transformers.onnx" ] }, { "cell_type": "markdown", "metadata": { "id": "_lBK1y0UFjSO" }, "source": [ "(source: [Configuration-based approach](https://huggingface.co/transformers/master/serialization.html#configuration-based-approach)) Transformers v4.9.0 introduces a new package: `transformers.onnx`. **This package allows converting checkpoints to an ONNX graph by leveraging configuration objects.** These configuration objects come ready made for a number of model architectures, and are made to be easily extendable to other architectures.\n", "\n", "Ready-made configurations include the following models:\n", "\n", "````\n", "ALBERT\n", "BART\n", "BERT\n", "DistilBERT\n", "GPT Neo\n", "LayoutLM\n", "Longformer\n", "mBART\n", "OpenAI GPT-2\n", "RoBERTa\n", "T5\n", "XLM-RoBERTa\n", "````" ] }, { "cell_type": "markdown", "metadata": { "id": "LdKK7nYXWib8" }, "source": [ "Run `transformers.onnx` (or the conversion script located at [transformers/convert_graph_to_onnx.py](https://github.com/huggingface/transformers/blob/master/src/transformers/convert_graph_to_onnx.py)). This script takes a few arguments such as the model to be exported and the framework you want to export from (PyTorch or TensorFlow)." ] }, { "cell_type": "markdown", "metadata": { "id": "8taFL-OHddE_" }, "source": [ "**WARNING**: which arguments to use?" ] }, { "cell_type": "markdown", "metadata": { "id": "mKf91Ju0Wib8" }, "source": [ "It will be exported under `onnx/pierreguillou-bert-base-cased-squad-v1.1-portuguese`." ] }, { "cell_type": "code", "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "X-sH97kXWib8", "outputId": "645bf295-26a6-4923-d111-11bf7738ccdc" }, "source": [ "%%time\n", "model_checkpoint_onnx = 'onnx/' + model_checkpoint.replace('/','-')\n", "\n", "!python -m transformers.onnx --model {model_checkpoint} {model_checkpoint_onnx}" ], "execution_count": 41, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "Some weights of the model checkpoint at pierreguillou/bert-base-cased-squad-v1.1-portuguese were not used when initializing BertModel: ['qa_outputs.bias', 'qa_outputs.weight']\n", "- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).\n", "- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).\n", "Some weights of BertModel were not initialized from the model checkpoint at pierreguillou/bert-base-cased-squad-v1.1-portuguese and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight']\n", "You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n", "Using framework PyTorch: 1.9.0+cu111\n", "Overriding 1 configuration item(s)\n", "\t- use_cache -> False\n", "Validating ONNX model...\n", "\t-[✓] ONNX model outputs' name match reference model ({'pooler_output', 'last_hidden_state'}\n", "\t- Validating ONNX Model output \"last_hidden_state\":\n", "\t\t-[✓] (2, 8, 768) matches (2, 8, 768)\n", "\t\t-[✓] all values close (atol: 0.0001)\n", "\t- Validating ONNX Model output \"pooler_output\":\n", "\t\t-[✓] (2, 768) matches (2, 768)\n", "\t\t-[✓] all values close (atol: 0.0001)\n", "All good, model saved at: onnx/pierreguillou-bert-base-cased-squad-v1.1-portuguese/model.onnx\n", "CPU times: user 104 ms, sys: 43.8 ms, total: 148 ms\n", "Wall time: 13.3 s\n" ] } ] }, { "cell_type": "markdown", "metadata": { "id": "cKopthXBSXAP" }, "source": [ "The outputs can be obtained by taking a look at the ONNX configuration of each model. For example, for BERT:" ] }, { "cell_type": "code", "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "M-KOpz6RSIH1", "outputId": "52a26a73-2c2d-406a-ea23-0fab95205fb8" }, "source": [ "from transformers.models.bert import BertOnnxConfig, BertConfig\n", "\n", "config = BertConfig()\n", "onnx_config = BertOnnxConfig(config)\n", "output_keys = list(onnx_config.outputs.keys())\n", "output_keys" ], "execution_count": 42, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "['last_hidden_state', 'pooler_output']" ] }, "metadata": {}, "execution_count": 42 } ] }, { "cell_type": "markdown", "metadata": { "id": "rQvqPtLaY3tG" }, "source": [ "**WARNING**: these outputs do not allow to get an answer!!!!!" ] }, { "cell_type": "markdown", "metadata": { "id": "SC7iQP-sdjpR" }, "source": [ "We can not continue. " ] }, { "cell_type": "code", "metadata": { "id": "0yk6QyZyKv4k" }, "source": [ "# import onnxruntime as ort\n", "# ort_session = ort.InferenceSession('onnx/pierreguillou-bert-base-cased-squad-v1.1-portuguese/model.onnx')" ], "execution_count": 43, "outputs": [] }, { "cell_type": "markdown", "metadata": { "id": "5K4w5U7u0nKI" }, "source": [ "## 4. Inference time | TorchScript" ] }, { "cell_type": "markdown", "metadata": { "id": "TSA0tteRsiz1" }, "source": [ "source: https://huggingface.co/transformers/serialization.html#torchscript" ] }, { "cell_type": "markdown", "metadata": { "id": "0cmuvCVLsovC" }, "source": [ "According to Pytorch’s documentation: “TorchScript is a way to create serializable and optimizable models from PyTorch code”. Pytorch’s two modules JIT and TRACE allow the developer to export their model to be re-used in other programs, such as efficiency-oriented C++ programs." ] }, { "cell_type": "markdown", "metadata": { "id": "rhJM05N8svj1" }, "source": [ "Hugging Face provided an interface that allows the export of 🤗 Transformers models to TorchScript so that they can be reused in a different environment than a Pytorch-based python program. " ] }, { "cell_type": "code", "metadata": { "id": "2Wt7_TL5P9ZM" }, "source": [ "from transformers import AutoTokenizer, AutoModelForQuestionAnswering\n", "\n", "tokenizer = AutoTokenizer.from_pretrained(model_checkpoint, use_fast=True, torchscript=True)\n", "model = AutoModelForQuestionAnswering.from_pretrained(model_checkpoint, torchscript=True)\n", "model.eval();" ], "execution_count": 44, "outputs": [] }, { "cell_type": "code", "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "kqfAkrhgq8fH", "outputId": "1a852da1-21e0-455d-ac66-60353597c0fc" }, "source": [ "model.bert.embeddings.word_embeddings.weight.data[0][0].item()" ], "execution_count": 45, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "0.009311608970165253" ] }, "metadata": {}, "execution_count": 45 } ] }, { "cell_type": "code", "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "r6HUX3Ztqunz", "outputId": "21623483-034a-4aee-9f9c-6f436b90aa4d" }, "source": [ "import sys\n", "sys.getsizeof(model.bert.embeddings.word_embeddings.weight.data[0][0].item()) " ], "execution_count": 46, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "24" ] }, "metadata": {}, "execution_count": 46 } ] }, { "cell_type": "markdown", "metadata": { "id": "hOG6K7tZqK2C" }, "source": [ "### 4.1 Tokenize the inputs" ] }, { "cell_type": "code", "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "OijtuOCLqK2D", "outputId": "2ade5358-92f5-47c6-cbe0-ce58e7ebfff3" }, "source": [ "num=100\n", "\n", "total = 0\n", "for i in range(num):\n", " start = perf_counter()\n", " inputs = tokenizer(question, context, add_special_tokens=True, return_tensors=\"pt\")\n", " diff = perf_counter() - start\n", " total += diff\n", "\n", "torchscript_mean_tokenizer = round((total/num)*1000,2)\n", "print(f'average time: {torchscript_mean_tokenizer} ms')" ], "execution_count": 47, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "average time: 0.82 ms\n" ] } ] }, { "cell_type": "markdown", "metadata": { "id": "0x_8BGWzQ8Yi" }, "source": [ "### 4.2 Model on CPU" ] }, { "cell_type": "code", "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "AhQwT81X8kk1", "outputId": "97fa9a83-6933-4e7f-e53d-7fe96d932b67" }, "source": [ "# put model and inputs to cpu\n", "model = model.to('cpu')\n", "inputs = inputs.to('cpu')\n", "\n", "# get mean time\n", "with torch.no_grad():\n", " traced_model = torch.jit.trace(model, [inputs.input_ids,inputs.token_type_ids,inputs.attention_mask])\n", " torchscript_mean_time_cpu = round(np.mean([timer(traced_model,inputs.input_ids,inputs.token_type_ids,inputs.attention_mask) for _ in range(100)]))\n", "print(f'{torchscript_mean_time_cpu}ms')" ], "execution_count": 48, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "849ms\n" ] } ] }, { "cell_type": "markdown", "metadata": { "id": "WABOBcVIqyBx" }, "source": [ "### 4.3 Results with TorchScript" ] }, { "cell_type": "code", "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 143 }, "id": "IhqwdJPGqyBy", "outputId": "5b7a8005-c2b6-42ca-d3d6-425c26e5669e" }, "source": [ "import pandas as pd\n", "\n", "raw_data = {\n", " 'Latency on CPU (ms)': [mean_time_cpu, onnx_mean_time_cpu, torchscript_mean_time_cpu],\n", " }\n", "\n", "df = pd.DataFrame(raw_data,\n", " index=pd.Index(['PyTorch (without pipeline)', 'ONNX Runtime', 'TorchScript']),\n", " columns=pd.Index(['Latency on CPU (ms)']))\n", "\n", "df" ], "execution_count": 49, "outputs": [ { "output_type": "execute_result", "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Latency on CPU (ms)
PyTorch (without pipeline)889.07
ONNX Runtime711.50
TorchScript849.00
\n", "
" ], "text/plain": [ " Latency on CPU (ms)\n", "PyTorch (without pipeline) 889.07\n", "ONNX Runtime 711.50\n", "TorchScript 849.00" ] }, "metadata": {}, "execution_count": 49 } ] }, { "cell_type": "code", "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 352 }, "id": "4z7trW6yqyBz", "outputId": "187a7972-3eeb-456c-c86c-f5e693d49baa" }, "source": [ "import numpy as np\n", "import matplotlib.pyplot as plt\n", "\n", "labels = ['CPU']\n", "data = [mean_time_cpu, onnx_mean_time_cpu, torchscript_mean_time_cpu]\n", "\n", "fig = plt.figure()\n", "ax = fig.add_axes([0,0,1,1])\n", "\n", "X = np.arange(1)\n", "ax.bar(X - 0.2, data[0], color = 'r', width = 0.2, label='PyTorch (without pipeline)')\n", "ax.bar(X, data[1], color = 'g', width = 0.2, label='ONNX Runtime')\n", "ax.bar(X + 0.2, data[2], color = 'b', width = 0.2, label='TorchScript')\n", "\n", "# axes and title\n", "x = np.arange(len(labels)) # the label locations\n", "ax.set_xticks(x)\n", "ax.set_xticklabels(labels)\n", "ax.set_ylabel('Latency (ms)')\n", "ax.set_title('Inference latency (CPU) in Google Colab')\n", "\n", "leg = ax.legend();" ], "execution_count": 50, "outputs": [ { "output_type": "display_data", "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" } } ] }, { "cell_type": "markdown", "metadata": { "id": "8Emg1-pErnr2" }, "source": [ "- TorchScript does not help improve latency on CPU.\n", "- On CPU, it's better to use **ONNX Runtime**." ] }, { "cell_type": "markdown", "metadata": { "id": "uMgo0Poja70v" }, "source": [ "# END" ] } ] }