{ "nbformat": 4, "nbformat_minor": 0, "metadata": { "colab": { "provenance": [] }, "kernelspec": { "name": "python3", "display_name": "Python 3" }, "language_info": { "name": "python" }, "widgets": { "application/vnd.jupyter.widget-state+json": { "25418b744a94403b87bdb1ee441b0f0f": { "model_module": "@jupyter-widgets/controls", "model_name": "HBoxModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HBoxModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HBoxView", "box_style": "", "children": [ "IPY_MODEL_97604edfba2b406ba24c09444dd83669", "IPY_MODEL_bd19a87519164940b31566aa9dc83783", "IPY_MODEL_142b45c1a5b6428a8238c9f5a3b67af4" ], "layout": "IPY_MODEL_79d4c52b439b462e817c804c9ae5868a" } }, "97604edfba2b406ba24c09444dd83669": { "model_module": "@jupyter-widgets/controls", "": "HTMLModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_8f2efd39a46c4652baf9c150961747e3", "placeholder": "​", "style": "IPY_MODEL_b5ab71c0700440abb2241e21c1ff2700", "value": "yolox_l0.05.onnx: 100%" } }, "bd19a87519164940b31566aa9dc83783": { "model_module": "@jupyter-widgets/controls", "": "FloatProgressModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_": "FloatProgressModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "ProgressView", "bar_style": "success", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_2dc3b854904340da829f069019913c38", "max": 216625723, "min": 0, "orientation": "horizontal", "style": "IPY_MODEL_990c72a5cb1643da918286ec22a0549e", "value": 216625723 } }, "142b45c1a5b6428a8238c9f5a3b67af4": { "model_module": "@jupyter-widgets/controls", "": "HTMLModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_a9eca96f57de49eab0340ca23b760b27", "placeholder": "​", "style": "IPY_MODEL_32231abc0d4241dd94b1219c27e61158", "value": " 217M/217M [00:01<00:00, 152MB/s]" } }, "79d4c52b439b462e817c804c9ae5868a": { "model_module": "@jupyter-widgets/base", "": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "8f2efd39a46c4652baf9c150961747e3": { "model_module": "@jupyter-widgets/base", "": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "b5ab71c0700440abb2241e21c1ff2700": { "model_module": "@jupyter-widgets/controls", "": "DescriptionStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "2dc3b854904340da829f069019913c38": { "model_module": "@jupyter-widgets/base", "": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "990c72a5cb1643da918286ec22a0549e": { "model_module": "@jupyter-widgets/controls", "": "ProgressStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_": "ProgressStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "bar_color": null, "description_width": "" } }, "a9eca96f57de49eab0340ca23b760b27": { "model_module": "@jupyter-widgets/base", "": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "32231abc0d4241dd94b1219c27e61158": { "model_module": "@jupyter-widgets/controls", "": "DescriptionStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "20a9106670844c18b04aff1b71795212": { "model_module": "@jupyter-widgets/controls", "": "HBoxModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_": "HBoxModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HBoxView", "box_style": "", "children": [ "IPY_MODEL_670870037261419b93dd085c34eac92b", "IPY_MODEL_16858dfe37f14dafa82fc1eb35f41ccd", "IPY_MODEL_95d5d43f493d42149937016163ce4413" ], "layout": "IPY_MODEL_af27da8d0cf3455b9b316abbe0657ff9" } }, "670870037261419b93dd085c34eac92b": { "model_module": "@jupyter-widgets/controls", "": "HTMLModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_a180017cc04445409c8f73b7bca5d7f3", "placeholder": "​", "style": "IPY_MODEL_9f7fe02ed6dc46feae3d0f5fb1c4989d", "value": "config.json: 100%" } }, "16858dfe37f14dafa82fc1eb35f41ccd": { "model_module": "@jupyter-widgets/controls", "": "FloatProgressModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_": "FloatProgressModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "ProgressView", "bar_style": "success", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_98287e6ed278458eb18ed17a2fea408f", "max": 1469, "min": 0, "orientation": "horizontal", "style": "IPY_MODEL_2b77403b6c594536b50daa5ca7eb86e8", "value": 1469 } }, "95d5d43f493d42149937016163ce4413": { "model_module": "@jupyter-widgets/controls", "": "HTMLModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_217c6ca8a3134685904dd86fa59b20d9", "placeholder": "​", "style": "IPY_MODEL_fdf7d1d48cae4ab188dbc1535c857d8e", "value": " 1.47k/1.47k [00:00<00:00, 82.0kB/s]" } }, "af27da8d0cf3455b9b316abbe0657ff9": { "model_module": "@jupyter-widgets/base", "": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "a180017cc04445409c8f73b7bca5d7f3": { "model_module": "@jupyter-widgets/base", "": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "9f7fe02ed6dc46feae3d0f5fb1c4989d": { "model_module": "@jupyter-widgets/controls", "": "DescriptionStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "98287e6ed278458eb18ed17a2fea408f": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "2b77403b6c594536b50daa5ca7eb86e8": { "model_module": "@jupyter-widgets/controls", "model_name": "ProgressStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "ProgressStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "bar_color": null, "description_width": "" } }, "217c6ca8a3134685904dd86fa59b20d9": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "fdf7d1d48cae4ab188dbc1535c857d8e": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "d3f7dd522e6d40bf94e46a44e70ef614": { "model_module": "@jupyter-widgets/controls", "model_name": "HBoxModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HBoxModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HBoxView", "box_style": "", "children": [ "IPY_MODEL_13aa62f7ac34402ba6618bd63d6e2dd8", "IPY_MODEL_c81f1b0435d2462f9c9e672c11b01f5e", "IPY_MODEL_5feec57ac1b248d3bdd627ff139d8d52" ], "layout": "IPY_MODEL_391ed696c4824dc3ae25cb0add233071" } }, "13aa62f7ac34402ba6618bd63d6e2dd8": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_397bc80cd9844c5699abd62234539b5e", "placeholder": "​", "style": "IPY_MODEL_9b668233eb794d63ab9d168bd9e214b8", "value": "model.safetensors: 100%" } }, "c81f1b0435d2462f9c9e672c11b01f5e": { "model_module": "@jupyter-widgets/controls", "model_name": "FloatProgressModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "FloatProgressModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "ProgressView", "bar_style": "success", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_c69ca675c5884f8593b45341fa898992", "max": 115434268, "min": 0, "orientation": "horizontal", "style": "IPY_MODEL_7813ca840b004910a36a5fd5ff3bbd78", "value": 115434268 } }, "5feec57ac1b248d3bdd627ff139d8d52": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_4ec1df63dd5743099a8dca4a8d677701", "placeholder": "​", "style": "IPY_MODEL_e75cfc4ba9ef49f081bda4bbea77f54a", "value": " 115M/115M [00:00<00:00, 196MB/s]" } }, "391ed696c4824dc3ae25cb0add233071": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "397bc80cd9844c5699abd62234539b5e": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "9b668233eb794d63ab9d168bd9e214b8": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "c69ca675c5884f8593b45341fa898992": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "7813ca840b004910a36a5fd5ff3bbd78": { "model_module": "@jupyter-widgets/controls", "model_name": "ProgressStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "ProgressStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "bar_color": null, "description_width": "" } }, "4ec1df63dd5743099a8dca4a8d677701": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "e75cfc4ba9ef49f081bda4bbea77f54a": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "d2a1416fa00e4e1bae2e9554f27a4ffc": { "model_module": "@jupyter-widgets/controls", "model_name": "HBoxModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HBoxModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HBoxView", "box_style": "", "children": [ "IPY_MODEL_9fee1d2a9ba04a9dac8094e3d73ba233", "IPY_MODEL_d5d2aac91f4f4d3598144d3ef0d9a5c1", "IPY_MODEL_cbe1eaebf48e44b8b12cc015351e4fa2" ], "layout": "IPY_MODEL_da5d88103dd7476588e8840e17b2c60c" } }, "9fee1d2a9ba04a9dac8094e3d73ba233": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_47079e4440fb495bbc444fd84a38bd26", "placeholder": "​", "style": "IPY_MODEL_025ff4bbf0fd4fe98318e2b6572fa92e", "value": "model.safetensors: 100%" } }, "d5d2aac91f4f4d3598144d3ef0d9a5c1": { "model_module": "@jupyter-widgets/controls", "model_name": "FloatProgressModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "FloatProgressModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "ProgressView", "bar_style": "success", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_f603dd0fa759420bb761014a8c16ffeb", "max": 46807446, "min": 0, "orientation": "horizontal", "style": "IPY_MODEL_88cde8b2165e4c6da739ad395c0fdbb2", "value": 46807446 } }, "cbe1eaebf48e44b8b12cc015351e4fa2": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_fbf0a24f3e7a4041808ee387e5bbb0c0", "placeholder": "​", "style": "IPY_MODEL_2e573c7670a34f638fc3b149bf761fa3", "value": " 46.8M/46.8M [00:00<00:00, 280MB/s]" } }, "da5d88103dd7476588e8840e17b2c60c": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "47079e4440fb495bbc444fd84a38bd26": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "025ff4bbf0fd4fe98318e2b6572fa92e": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "f603dd0fa759420bb761014a8c16ffeb": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "88cde8b2165e4c6da739ad395c0fdbb2": { "model_module": "@jupyter-widgets/controls", "model_name": "ProgressStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "ProgressStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "bar_color": null, "description_width": "" } }, "fbf0a24f3e7a4041808ee387e5bbb0c0": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "2e573c7670a34f638fc3b149bf761fa3": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } } } } }, "cells": [ { "cell_type": "markdown", "source": [ "# Unstructured - PDF to JSON and HTML" ], "metadata": { "id": "bXfuGoJFxrFx" } }, { "cell_type": "markdown", "source": [ "- Author: [Pierre GUILLOU](https://www.linkedin.com/in/pierreguillou/)\n", "- Date: 18/12/2023 (previous version: 15/12/2023)\n", "- Blog post: [unstructured library | Get the JSON and HTML versions of any PDF (legal, financial, medical…), even PDF with tables!](https://medium.com/@pierre_guillou/unstructured-library-get-the-json-and-html-versions-of-any-pdf-legal-financial-medical-fb7738769caa)" ], "metadata": { "id": "JIGCcb8nW_W_" } }, { "cell_type": "markdown", "source": [ "## Setup" ], "metadata": { "id": "Sp026Z98x1Lk" } }, { "cell_type": "code", "source": [ "%%capture\n", "!apt-get install poppler-utils\n", "!apt-get install tesseract-ocr-all\n", "# unstructured 0.11.5\n", "# unstructured-inference 0.7.19\n", "!pip install unstructured[all-docs] unstructured-inference" ], "metadata": { "id": "xIfLbhqF70eP" }, "execution_count": 1, "outputs": [] }, { "cell_type": "markdown", "source": [ "**Restart session in Colab!**" ], "metadata": { "id": "7xJKVdcZxvgV" } }, { "cell_type": "markdown", "source": [ "## Import libraries" ], "metadata": { "id": "BJIOzVJPKa3S" } }, { "cell_type": "markdown", "source": [ "**You must have restarted your session before to run the next cell.**" ], "metadata": { "id": "L1AqgrNnKe6D" } }, { "cell_type": "code", "source": [ "import pathlib\n", "from pathlib import Path" ], "metadata": { "id": "hguT7OF9l5mp" }, "execution_count": 1, "outputs": [] }, { "cell_type": "code", "source": [ "# select the partition function\n", "from unstructured.partition.pdf import partition_pdf # version unstructured 0.11.5" ], "metadata": { "id": "fPbNYGbbeTMK" }, "execution_count": 2, "outputs": [] }, { "cell_type": "code", "source": [ "# Define parameters for Unstructured's library\n", "\n", "## include_page_breaks\n", "# include page breaks (default is False)\n", "include_page_breaks = True\n", "\n", "## strategy\n", "# The strategy to use for partitioning the PDF. Valid strategies are \"hi_res\", \"ocr_only\", and \"fast\".\n", "# When using the \"hi_res\" strategy, the function uses a layout detection model to identify document elements.\n", "# hi_res\" is used for analyzing PDFs and extracting table structure (default is \"auto\")\n", "strategy = \"hi_res\"\n", "\n", "## infer_table_structure\n", "# Only applicable if `strategy=hi_res`.\n", "# If True, any Table elements that are extracted will also have a metadata field named \"text_as_html\" where the table's text content is rendered into an html string.\n", "# I.e., rows and cells are preserved.\n", "# Whether True or False, the \"text\" field is always present in any Table element and is the text content of the table (no structure).\n", "\n", "if strategy == \"hi_res\": infer_table_structure = True\n", "else: infer_table_structure = False\n", "\n", "## extract_element_types\n", "# Get images of tables\n", "if infer_table_structure == True: extract_element_types=['Table']\n", "else: extract_element_types=None\n", "\n", "## max_characters\n", "# The maximum number of characters to include in a partition (document element)\n", "# If None is passed, no maximum is applied.\n", "# Only applies to the \"ocr_only\" strategy (default is 1500)\n", "if strategy != \"ocr_only\": max_characters = None\n", "\n", "## languages\n", "# The languages to use for the Tesseract agent.\n", "# To use a language, you'll first need to install the appropriate Tesseract language pack.\n", "languages = [\"eng\"] # example if more than one \"eng+por\" (default is \"eng\")\n", "\n", "## model_name\n", "# @requires_dependencies(\"unstructured_inference\")\n", "# yolox: best model for table extraction. Other options are yolox_quantized, detectron2_onnx and chipper depending on file layout\n", "# source: https://unstructured-io.github.io/unstructured/best_practices/models.html\n", "hi_res_model_name = \"yolox\"" ], "metadata": { "id": "J5sJ3QvilljH" }, "execution_count": 3, "outputs": [] }, { "cell_type": "markdown", "source": [ "## PDF file" ], "metadata": { "id": "Z8_8oeTr-VFi" } }, { "cell_type": "code", "source": [ "path = \"/content/\"\n", "filename = path + \"Quarterly.Financial.Report.Template.pdf\"" ], "metadata": { "id": "5QLNHN7K-Wyf" }, "execution_count": 4, "outputs": [] }, { "cell_type": "markdown", "source": [ "## Get partition in json file" ], "metadata": { "id": "Wcjm6AtI-XxS" } }, { "cell_type": "code", "source": [ "# Returns a List[Element] present in the pages of the parsed pdf document\n", "elements = partition_pdf(\n", " filename=filename,\n", " include_page_breaks=include_page_breaks,\n", " strategy=strategy,\n", " infer_table_structure=infer_table_structure,\n", " extract_element_types=extract_element_types,\n", " max_characters=max_characters,\n", " languages=languages,\n", " hi_res_model_name=hi_res_model_name,\n", " )\n", "\n", "# get output as json\n", "from unstructured.staging.base import elements_to_json\n", "elements_to_json(elements, filename=f\"{filename}.json\") # Takes a while for file to show up on the Google Colab" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 308, "referenced_widgets": [ "25418b744a94403b87bdb1ee441b0f0f", "97604edfba2b406ba24c09444dd83669", "bd19a87519164940b31566aa9dc83783", "142b45c1a5b6428a8238c9f5a3b67af4", "79d4c52b439b462e817c804c9ae5868a", "8f2efd39a46c4652baf9c150961747e3", "b5ab71c0700440abb2241e21c1ff2700", "2dc3b854904340da829f069019913c38", "990c72a5cb1643da918286ec22a0549e", "a9eca96f57de49eab0340ca23b760b27", "32231abc0d4241dd94b1219c27e61158", "20a9106670844c18b04aff1b71795212", "670870037261419b93dd085c34eac92b", "16858dfe37f14dafa82fc1eb35f41ccd", "95d5d43f493d42149937016163ce4413", "af27da8d0cf3455b9b316abbe0657ff9", "a180017cc04445409c8f73b7bca5d7f3", "9f7fe02ed6dc46feae3d0f5fb1c4989d", "98287e6ed278458eb18ed17a2fea408f", "2b77403b6c594536b50daa5ca7eb86e8", "217c6ca8a3134685904dd86fa59b20d9", "fdf7d1d48cae4ab188dbc1535c857d8e", "d3f7dd522e6d40bf94e46a44e70ef614", "13aa62f7ac34402ba6618bd63d6e2dd8", "c81f1b0435d2462f9c9e672c11b01f5e", "5feec57ac1b248d3bdd627ff139d8d52", "391ed696c4824dc3ae25cb0add233071", "397bc80cd9844c5699abd62234539b5e", "9b668233eb794d63ab9d168bd9e214b8", "c69ca675c5884f8593b45341fa898992", "7813ca840b004910a36a5fd5ff3bbd78", "4ec1df63dd5743099a8dca4a8d677701", "e75cfc4ba9ef49f081bda4bbea77f54a", "d2a1416fa00e4e1bae2e9554f27a4ffc", "9fee1d2a9ba04a9dac8094e3d73ba233", "d5d2aac91f4f4d3598144d3ef0d9a5c1", "cbe1eaebf48e44b8b12cc015351e4fa2", "da5d88103dd7476588e8840e17b2c60c", "47079e4440fb495bbc444fd84a38bd26", "025ff4bbf0fd4fe98318e2b6572fa92e", "f603dd0fa759420bb761014a8c16ffeb", "88cde8b2165e4c6da739ad395c0fdbb2", "fbf0a24f3e7a4041808ee387e5bbb0c0", "2e573c7670a34f638fc3b149bf761fa3" ] }, "id": "SGNzEv6VQi-v", "outputId": "ede349a8-43e0-496e-fcce-7fea14aff12d" }, "execution_count": 5, "outputs": [ { "output_type": "stream", "name": "stderr", "text": [ "[nltk_data] Downloading package punkt to /root/nltk_data...\n", "[nltk_data] Unzipping tokenizers/punkt.zip.\n", "[nltk_data] Downloading package averaged_perceptron_tagger to\n", "[nltk_data] /root/nltk_data...\n", "[nltk_data] Unzipping taggers/averaged_perceptron_tagger.zip.\n" ] }, { "output_type": "display_data", "data": { "text/plain": [ "yolox_l0.05.onnx: 0%| | 0.00/217M [00:00\" + entry[\"text\"] + \"\"\n", " elif entry[\"type\"] == \"Table\":\n", " text = entry[\"metadata\"][\"text_as_html\"]\n", " else:\n", " text = \"

\" + entry[\"text\"] + \"

\"\n", "\n", " if text != text_prev: extracted_elements.append(text)\n", " text_prev = text\n", "\n", " # Write the extracted elements to the output file\n", " html_start = \"\"\"\n", " \n", " \n", " \n", " Document Information\n", " \n", " \n", " \n", " \"\"\"\n", "\n", " html_end = \"\"\"\n", " \n", " \n", " \"\"\"\n", "\n", " output_file_html = path + Path(input_filename).name.replace(\".json\", \"\") + \"_\" + model_name + \".html\"\n", " with open(output_file_html, 'w') as output_file:\n", " output_file.write(html_start + \"\\n\")\n", " for element in extracted_elements:\n", " output_file.write(element + \"\\n\")\n", " output_file.write(html_end + \"\\n\")\n", "\n", " return str(output_file_html)" ], "metadata": { "id": "-mJdBB03m3ms" }, "execution_count": 6, "outputs": [] }, { "cell_type": "code", "source": [ "import json\n", "output_file_html = process_json_file(f\"{filename}.json\") # It can take a while for the .html file to show up in Colab\n", "\n", "from google.colab import files\n", "files.download(output_file_html)" ], "metadata": { "id": "wES2N4yRnZk6", "colab": { "base_uri": "https://localhost:8080/", "height": 17 }, "outputId": "9d31cf8d-c3f7-408d-caad-447db28efe0b" }, "execution_count": 7, "outputs": [ { "output_type": "display_data", "data": { "text/plain": [ "" ], "application/javascript": [ "\n", " async function download(id, filename, size) {\n", " if (!google.colab.kernel.accessAllowed) {\n", " return;\n", " }\n", " const div = document.createElement('div');\n", " const label = document.createElement('label');\n", " label.textContent = `Downloading \"${filename}\": `;\n", " div.appendChild(label);\n", " const progress = document.createElement('progress');\n", " progress.max = size;\n", " div.appendChild(progress);\n", " document.body.appendChild(div);\n", "\n", " const buffers = [];\n", " let downloaded = 0;\n", "\n", " const channel = await google.colab.kernel.comms.open(id);\n", " // Send a message to notify the kernel that we're ready.\n", " channel.send({})\n", "\n", " for await (const message of channel.messages) {\n", " // Send a message to notify the kernel that we're ready.\n", " channel.send({})\n", " if (message.buffers) {\n", " for (const buffer of message.buffers) {\n", " buffers.push(buffer);\n", " downloaded += buffer.byteLength;\n", " progress.value = downloaded;\n", " }\n", " }\n", " }\n", " const blob = new Blob(buffers, {type: 'application/binary'});\n", " const a = document.createElement('a');\n", " a.href = window.URL.createObjectURL(blob);\n", " a.download = filename;\n", " div.appendChild(a);\n", " a.click();\n", " div.remove();\n", " }\n", " " ] }, "metadata": {} }, { "output_type": "display_data", "data": { "text/plain": [ "" ], "application/javascript": [ "download(\"download_6176995b-401f-4c18-b685-7831c65a16eb\", \"Quarterly.Financial.Report.Template.pdf_yolox.html\", 37559)" ] }, "metadata": {} } ] }, { "cell_type": "markdown", "source": [ "# END" ], "metadata": { "id": "71l8DjsyAkyz" } } ] }