Last active
September 6, 2020 01:31
-
-
Save neoyipeng2018/cbe64b40ad683ff50b15fb67fa39fabd to your computer and use it in GitHub Desktop.
FineTuningWith π€ Trainer.ipynb
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| { | |
| "nbformat": 4, | |
| "nbformat_minor": 0, | |
| "metadata": { | |
| "colab": { | |
| "name": "FineTuningWith π€ Trainer.ipynb", | |
| "provenance": [], | |
| "collapsed_sections": [ | |
| "3slpjqSp8zsZ", | |
| "4l-hhP-GaX_j" | |
| ], | |
| "include_colab_link": true | |
| }, | |
| "kernelspec": { | |
| "display_name": "Python 3", | |
| "language": "python", | |
| "name": "python3" | |
| }, | |
| "language_info": { | |
| "codemirror_mode": { | |
| "name": "ipython", | |
| "version": 3 | |
| }, | |
| "file_extension": ".py", | |
| "mimetype": "text/x-python", | |
| "name": "python", | |
| "nbconvert_exporter": "python", | |
| "pygments_lexer": "ipython3", | |
| "version": "3.7.4" | |
| }, | |
| "widgets": { | |
| "application/vnd.jupyter.widget-state+json": { | |
| "c6a73993a49c432ba584ec488247c016": { | |
| "model_module": "@jupyter-widgets/controls", | |
| "model_name": "HBoxModel", | |
| "state": { | |
| "_view_name": "HBoxView", | |
| "_dom_classes": [], | |
| "_model_name": "HBoxModel", | |
| "_view_module": "@jupyter-widgets/controls", | |
| "_model_module_version": "1.5.0", | |
| "_view_count": null, | |
| "_view_module_version": "1.5.0", | |
| "box_style": "", | |
| "layout": "IPY_MODEL_2a083ff521a749e2b684c6bd3005bc13", | |
| "_model_module": "@jupyter-widgets/controls", | |
| "children": [ | |
| "IPY_MODEL_bbcace5b2216435babe5d2e758894022", | |
| "IPY_MODEL_637a2c14acab4d84ad9fd9fcd2a110f8" | |
| ] | |
| } | |
| }, | |
| "2a083ff521a749e2b684c6bd3005bc13": { | |
| "model_module": "@jupyter-widgets/base", | |
| "model_name": "LayoutModel", | |
| "state": { | |
| "_view_name": "LayoutView", | |
| "grid_template_rows": null, | |
| "right": null, | |
| "justify_content": null, | |
| "_view_module": "@jupyter-widgets/base", | |
| "overflow": null, | |
| "_model_module_version": "1.2.0", | |
| "_view_count": null, | |
| "flex_flow": null, | |
| "width": null, | |
| "min_width": null, | |
| "border": null, | |
| "align_items": null, | |
| "bottom": null, | |
| "_model_module": "@jupyter-widgets/base", | |
| "top": null, | |
| "grid_column": null, | |
| "overflow_y": null, | |
| "overflow_x": null, | |
| "grid_auto_flow": null, | |
| "grid_area": null, | |
| "grid_template_columns": null, | |
| "flex": null, | |
| "_model_name": "LayoutModel", | |
| "justify_items": null, | |
| "grid_row": null, | |
| "max_height": null, | |
| "align_content": null, | |
| "visibility": null, | |
| "align_self": null, | |
| "height": null, | |
| "min_height": null, | |
| "padding": null, | |
| "grid_auto_rows": null, | |
| "grid_gap": null, | |
| "max_width": null, | |
| "order": null, | |
| "_view_module_version": "1.2.0", | |
| "grid_template_areas": null, | |
| "object_position": null, | |
| "object_fit": null, | |
| "grid_auto_columns": null, | |
| "margin": null, | |
| "display": null, | |
| "left": null | |
| } | |
| }, | |
| "bbcace5b2216435babe5d2e758894022": { | |
| "model_module": "@jupyter-widgets/controls", | |
| "model_name": "FloatProgressModel", | |
| "state": { | |
| "_view_name": "ProgressView", | |
| "style": "IPY_MODEL_1cd5520bc2164cb5aa4e9722ac664438", | |
| "_dom_classes": [], | |
| "description": "Downloading: 100%", | |
| "_model_name": "FloatProgressModel", | |
| "bar_style": "success", | |
| "max": 411, | |
| "_view_module": "@jupyter-widgets/controls", | |
| "_model_module_version": "1.5.0", | |
| "value": 411, | |
| "_view_count": null, | |
| "_view_module_version": "1.5.0", | |
| "orientation": "horizontal", | |
| "min": 0, | |
| "description_tooltip": null, | |
| "_model_module": "@jupyter-widgets/controls", | |
| "layout": "IPY_MODEL_224b1f677b2c496bbf3037952287207d" | |
| } | |
| }, | |
| "637a2c14acab4d84ad9fd9fcd2a110f8": { | |
| "model_module": "@jupyter-widgets/controls", | |
| "model_name": "HTMLModel", | |
| "state": { | |
| "_view_name": "HTMLView", | |
| "style": "IPY_MODEL_da0e168573b74ae79336994546cee36d", | |
| "_dom_classes": [], | |
| "description": "", | |
| "_model_name": "HTMLModel", | |
| "placeholder": "β", | |
| "_view_module": "@jupyter-widgets/controls", | |
| "_model_module_version": "1.5.0", | |
| "value": " 411/411 [00:00<00:00, 615B/s]", | |
| "_view_count": null, | |
| "_view_module_version": "1.5.0", | |
| "description_tooltip": null, | |
| "_model_module": "@jupyter-widgets/controls", | |
| "layout": "IPY_MODEL_9312c749c6cd419f90f15fd8fab2bdb2" | |
| } | |
| }, | |
| "1cd5520bc2164cb5aa4e9722ac664438": { | |
| "model_module": "@jupyter-widgets/controls", | |
| "model_name": "ProgressStyleModel", | |
| "state": { | |
| "_view_name": "StyleView", | |
| "_model_name": "ProgressStyleModel", | |
| "description_width": "initial", | |
| "_view_module": "@jupyter-widgets/base", | |
| "_model_module_version": "1.5.0", | |
| "_view_count": null, | |
| "_view_module_version": "1.2.0", | |
| "bar_color": null, | |
| "_model_module": "@jupyter-widgets/controls" | |
| } | |
| }, | |
| "224b1f677b2c496bbf3037952287207d": { | |
| "model_module": "@jupyter-widgets/base", | |
| "model_name": "LayoutModel", | |
| "state": { | |
| "_view_name": "LayoutView", | |
| "grid_template_rows": null, | |
| "right": null, | |
| "justify_content": null, | |
| "_view_module": "@jupyter-widgets/base", | |
| "overflow": null, | |
| "_model_module_version": "1.2.0", | |
| "_view_count": null, | |
| "flex_flow": null, | |
| "width": null, | |
| "min_width": null, | |
| "border": null, | |
| "align_items": null, | |
| "bottom": null, | |
| "_model_module": "@jupyter-widgets/base", | |
| "top": null, | |
| "grid_column": null, | |
| "overflow_y": null, | |
| "overflow_x": null, | |
| "grid_auto_flow": null, | |
| "grid_area": null, | |
| "grid_template_columns": null, | |
| "flex": null, | |
| "_model_name": "LayoutModel", | |
| "justify_items": null, | |
| "grid_row": null, | |
| "max_height": null, | |
| "align_content": null, | |
| "visibility": null, | |
| "align_self": null, | |
| "height": null, | |
| "min_height": null, | |
| "padding": null, | |
| "grid_auto_rows": null, | |
| "grid_gap": null, | |
| "max_width": null, | |
| "order": null, | |
| "_view_module_version": "1.2.0", | |
| "grid_template_areas": null, | |
| "object_position": null, | |
| "object_fit": null, | |
| "grid_auto_columns": null, | |
| "margin": null, | |
| "display": null, | |
| "left": null | |
| } | |
| }, | |
| "da0e168573b74ae79336994546cee36d": { | |
| "model_module": "@jupyter-widgets/controls", | |
| "model_name": "DescriptionStyleModel", | |
| "state": { | |
| "_view_name": "StyleView", | |
| "_model_name": "DescriptionStyleModel", | |
| "description_width": "", | |
| "_view_module": "@jupyter-widgets/base", | |
| "_model_module_version": "1.5.0", | |
| "_view_count": null, | |
| "_view_module_version": "1.2.0", | |
| "_model_module": "@jupyter-widgets/controls" | |
| } | |
| }, | |
| "9312c749c6cd419f90f15fd8fab2bdb2": { | |
| "model_module": "@jupyter-widgets/base", | |
| "model_name": "LayoutModel", | |
| "state": { | |
| "_view_name": "LayoutView", | |
| "grid_template_rows": null, | |
| "right": null, | |
| "justify_content": null, | |
| "_view_module": "@jupyter-widgets/base", | |
| "overflow": null, | |
| "_model_module_version": "1.2.0", | |
| "_view_count": null, | |
| "flex_flow": null, | |
| "width": null, | |
| "min_width": null, | |
| "border": null, | |
| "align_items": null, | |
| "bottom": null, | |
| "_model_module": "@jupyter-widgets/base", | |
| "top": null, | |
| "grid_column": null, | |
| "overflow_y": null, | |
| "overflow_x": null, | |
| "grid_auto_flow": null, | |
| "grid_area": null, | |
| "grid_template_columns": null, | |
| "flex": null, | |
| "_model_name": "LayoutModel", | |
| "justify_items": null, | |
| "grid_row": null, | |
| "max_height": null, | |
| "align_content": null, | |
| "visibility": null, | |
| "align_self": null, | |
| "height": null, | |
| "min_height": null, | |
| "padding": null, | |
| "grid_auto_rows": null, | |
| "grid_gap": null, | |
| "max_width": null, | |
| "order": null, | |
| "_view_module_version": "1.2.0", | |
| "grid_template_areas": null, | |
| "object_position": null, | |
| "object_fit": null, | |
| "grid_auto_columns": null, | |
| "margin": null, | |
| "display": null, | |
| "left": null | |
| } | |
| }, | |
| "f7721aa245fd40fcbaecf1fbc6855a39": { | |
| "model_module": "@jupyter-widgets/controls", | |
| "model_name": "HBoxModel", | |
| "state": { | |
| "_view_name": "HBoxView", | |
| "_dom_classes": [], | |
| "_model_name": "HBoxModel", | |
| "_view_module": "@jupyter-widgets/controls", | |
| "_model_module_version": "1.5.0", | |
| "_view_count": null, | |
| "_view_module_version": "1.5.0", | |
| "box_style": "", | |
| "layout": "IPY_MODEL_4754b43cca614df7af16afb508c99b8a", | |
| "_model_module": "@jupyter-widgets/controls", | |
| "children": [ | |
| "IPY_MODEL_3d3e2d7609454dae98ec0a85f4e219ea", | |
| "IPY_MODEL_b0e7c0e8454e48698b9aff2271a59683" | |
| ] | |
| } | |
| }, | |
| "4754b43cca614df7af16afb508c99b8a": { | |
| "model_module": "@jupyter-widgets/base", | |
| "model_name": "LayoutModel", | |
| "state": { | |
| "_view_name": "LayoutView", | |
| "grid_template_rows": null, | |
| "right": null, | |
| "justify_content": null, | |
| "_view_module": "@jupyter-widgets/base", | |
| "overflow": null, | |
| "_model_module_version": "1.2.0", | |
| "_view_count": null, | |
| "flex_flow": null, | |
| "width": null, | |
| "min_width": null, | |
| "border": null, | |
| "align_items": null, | |
| "bottom": null, | |
| "_model_module": "@jupyter-widgets/base", | |
| "top": null, | |
| "grid_column": null, | |
| "overflow_y": null, | |
| "overflow_x": null, | |
| "grid_auto_flow": null, | |
| "grid_area": null, | |
| "grid_template_columns": null, | |
| "flex": null, | |
| "_model_name": "LayoutModel", | |
| "justify_items": null, | |
| "grid_row": null, | |
| "max_height": null, | |
| "align_content": null, | |
| "visibility": null, | |
| "align_self": null, | |
| "height": null, | |
| "min_height": null, | |
| "padding": null, | |
| "grid_auto_rows": null, | |
| "grid_gap": null, | |
| "max_width": null, | |
| "order": null, | |
| "_view_module_version": "1.2.0", | |
| "grid_template_areas": null, | |
| "object_position": null, | |
| "object_fit": null, | |
| "grid_auto_columns": null, | |
| "margin": null, | |
| "display": null, | |
| "left": null | |
| } | |
| }, | |
| "3d3e2d7609454dae98ec0a85f4e219ea": { | |
| "model_module": "@jupyter-widgets/controls", | |
| "model_name": "FloatProgressModel", | |
| "state": { | |
| "_view_name": "ProgressView", | |
| "style": "IPY_MODEL_ab1af4531e154c5ab80e940e5c8e3746", | |
| "_dom_classes": [], | |
| "description": "Downloading: 100%", | |
| "_model_name": "FloatProgressModel", | |
| "bar_style": "success", | |
| "max": 213450, | |
| "_view_module": "@jupyter-widgets/controls", | |
| "_model_module_version": "1.5.0", | |
| "value": 213450, | |
| "_view_count": null, | |
| "_view_module_version": "1.5.0", | |
| "orientation": "horizontal", | |
| "min": 0, | |
| "description_tooltip": null, | |
| "_model_module": "@jupyter-widgets/controls", | |
| "layout": "IPY_MODEL_70b83f9da5744ed0a91d5c9967f6cb7b" | |
| } | |
| }, | |
| "b0e7c0e8454e48698b9aff2271a59683": { | |
| "model_module": "@jupyter-widgets/controls", | |
| "model_name": "HTMLModel", | |
| "state": { | |
| "_view_name": "HTMLView", | |
| "style": "IPY_MODEL_5960e702f1724327942731b84f3ced8f", | |
| "_dom_classes": [], | |
| "description": "", | |
| "_model_name": "HTMLModel", | |
| "placeholder": "β", | |
| "_view_module": "@jupyter-widgets/controls", | |
| "_model_module_version": "1.5.0", | |
| "value": " 213k/213k [00:00<00:00, 853kB/s]", | |
| "_view_count": null, | |
| "_view_module_version": "1.5.0", | |
| "description_tooltip": null, | |
| "_model_module": "@jupyter-widgets/controls", | |
| "layout": "IPY_MODEL_2cc334cd9fc04419bcaeaec4f5a0f328" | |
| } | |
| }, | |
| "ab1af4531e154c5ab80e940e5c8e3746": { | |
| "model_module": "@jupyter-widgets/controls", | |
| "model_name": "ProgressStyleModel", | |
| "state": { | |
| "_view_name": "StyleView", | |
| "_model_name": "ProgressStyleModel", | |
| "description_width": "initial", | |
| "_view_module": "@jupyter-widgets/base", | |
| "_model_module_version": "1.5.0", | |
| "_view_count": null, | |
| "_view_module_version": "1.2.0", | |
| "bar_color": null, | |
| "_model_module": "@jupyter-widgets/controls" | |
| } | |
| }, | |
| "70b83f9da5744ed0a91d5c9967f6cb7b": { | |
| "model_module": "@jupyter-widgets/base", | |
| "model_name": "LayoutModel", | |
| "state": { | |
| "_view_name": "LayoutView", | |
| "grid_template_rows": null, | |
| "right": null, | |
| "justify_content": null, | |
| "_view_module": "@jupyter-widgets/base", | |
| "overflow": null, | |
| "_model_module_version": "1.2.0", | |
| "_view_count": null, | |
| "flex_flow": null, | |
| "width": null, | |
| "min_width": null, | |
| "border": null, | |
| "align_items": null, | |
| "bottom": null, | |
| "_model_module": "@jupyter-widgets/base", | |
| "top": null, | |
| "grid_column": null, | |
| "overflow_y": null, | |
| "overflow_x": null, | |
| "grid_auto_flow": null, | |
| "grid_area": null, | |
| "grid_template_columns": null, | |
| "flex": null, | |
| "_model_name": "LayoutModel", | |
| "justify_items": null, | |
| "grid_row": null, | |
| "max_height": null, | |
| "align_content": null, | |
| "visibility": null, | |
| "align_self": null, | |
| "height": null, | |
| "min_height": null, | |
| "padding": null, | |
| "grid_auto_rows": null, | |
| "grid_gap": null, | |
| "max_width": null, | |
| "order": null, | |
| "_view_module_version": "1.2.0", | |
| "grid_template_areas": null, | |
| "object_position": null, | |
| "object_fit": null, | |
| "grid_auto_columns": null, | |
| "margin": null, | |
| "display": null, | |
| "left": null | |
| } | |
| }, | |
| "5960e702f1724327942731b84f3ced8f": { | |
| "model_module": "@jupyter-widgets/controls", | |
| "model_name": "DescriptionStyleModel", | |
| "state": { | |
| "_view_name": "StyleView", | |
| "_model_name": "DescriptionStyleModel", | |
| "description_width": "", | |
| "_view_module": "@jupyter-widgets/base", | |
| "_model_module_version": "1.5.0", | |
| "_view_count": null, | |
| "_view_module_version": "1.2.0", | |
| "_model_module": "@jupyter-widgets/controls" | |
| } | |
| }, | |
| "2cc334cd9fc04419bcaeaec4f5a0f328": { | |
| "model_module": "@jupyter-widgets/base", | |
| "model_name": "LayoutModel", | |
| "state": { | |
| "_view_name": "LayoutView", | |
| "grid_template_rows": null, | |
| "right": null, | |
| "justify_content": null, | |
| "_view_module": "@jupyter-widgets/base", | |
| "overflow": null, | |
| "_model_module_version": "1.2.0", | |
| "_view_count": null, | |
| "flex_flow": null, | |
| "width": null, | |
| "min_width": null, | |
| "border": null, | |
| "align_items": null, | |
| "bottom": null, | |
| "_model_module": "@jupyter-widgets/base", | |
| "top": null, | |
| "grid_column": null, | |
| "overflow_y": null, | |
| "overflow_x": null, | |
| "grid_auto_flow": null, | |
| "grid_area": null, | |
| "grid_template_columns": null, | |
| "flex": null, | |
| "_model_name": "LayoutModel", | |
| "justify_items": null, | |
| "grid_row": null, | |
| "max_height": null, | |
| "align_content": null, | |
| "visibility": null, | |
| "align_self": null, | |
| "height": null, | |
| "min_height": null, | |
| "padding": null, | |
| "grid_auto_rows": null, | |
| "grid_gap": null, | |
| "max_width": null, | |
| "order": null, | |
| "_view_module_version": "1.2.0", | |
| "grid_template_areas": null, | |
| "object_position": null, | |
| "object_fit": null, | |
| "grid_auto_columns": null, | |
| "margin": null, | |
| "display": null, | |
| "left": null | |
| } | |
| }, | |
| "f3ab3e82cd7b4a85b530576a7e26cde2": { | |
| "model_module": "@jupyter-widgets/controls", | |
| "model_name": "HBoxModel", | |
| "state": { | |
| "_view_name": "HBoxView", | |
| "_dom_classes": [], | |
| "_model_name": "HBoxModel", | |
| "_view_module": "@jupyter-widgets/controls", | |
| "_model_module_version": "1.5.0", | |
| "_view_count": null, | |
| "_view_module_version": "1.5.0", | |
| "box_style": "", | |
| "layout": "IPY_MODEL_a060bdc45a754f6aae21badd838475a5", | |
| "_model_module": "@jupyter-widgets/controls", | |
| "children": [ | |
| "IPY_MODEL_532d0411a8a14b27b426663a4e875bf5", | |
| "IPY_MODEL_4aa71e4105804cc889cd99a88a478b3b" | |
| ] | |
| } | |
| }, | |
| "a060bdc45a754f6aae21badd838475a5": { | |
| "model_module": "@jupyter-widgets/base", | |
| "model_name": "LayoutModel", | |
| "state": { | |
| "_view_name": "LayoutView", | |
| "grid_template_rows": null, | |
| "right": null, | |
| "justify_content": null, | |
| "_view_module": "@jupyter-widgets/base", | |
| "overflow": null, | |
| "_model_module_version": "1.2.0", | |
| "_view_count": null, | |
| "flex_flow": null, | |
| "width": null, | |
| "min_width": null, | |
| "border": null, | |
| "align_items": null, | |
| "bottom": null, | |
| "_model_module": "@jupyter-widgets/base", | |
| "top": null, | |
| "grid_column": null, | |
| "overflow_y": null, | |
| "overflow_x": null, | |
| "grid_auto_flow": null, | |
| "grid_area": null, | |
| "grid_template_columns": null, | |
| "flex": null, | |
| "_model_name": "LayoutModel", | |
| "justify_items": null, | |
| "grid_row": null, | |
| "max_height": null, | |
| "align_content": null, | |
| "visibility": null, | |
| "align_self": null, | |
| "height": null, | |
| "min_height": null, | |
| "padding": null, | |
| "grid_auto_rows": null, | |
| "grid_gap": null, | |
| "max_width": null, | |
| "order": null, | |
| "_view_module_version": "1.2.0", | |
| "grid_template_areas": null, | |
| "object_position": null, | |
| "object_fit": null, | |
| "grid_auto_columns": null, | |
| "margin": null, | |
| "display": null, | |
| "left": null | |
| } | |
| }, | |
| "532d0411a8a14b27b426663a4e875bf5": { | |
| "model_module": "@jupyter-widgets/controls", | |
| "model_name": "FloatProgressModel", | |
| "state": { | |
| "_view_name": "ProgressView", | |
| "style": "IPY_MODEL_3c212742117f4a27aaca29fcde46c709", | |
| "_dom_classes": [], | |
| "description": "Downloading: 100%", | |
| "_model_name": "FloatProgressModel", | |
| "bar_style": "success", | |
| "max": 263273408, | |
| "_view_module": "@jupyter-widgets/controls", | |
| "_model_module_version": "1.5.0", | |
| "value": 263273408, | |
| "_view_count": null, | |
| "_view_module_version": "1.5.0", | |
| "orientation": "horizontal", | |
| "min": 0, | |
| "description_tooltip": null, | |
| "_model_module": "@jupyter-widgets/controls", | |
| "layout": "IPY_MODEL_8b3db1af7ec844079405c2a80c88f813" | |
| } | |
| }, | |
| "4aa71e4105804cc889cd99a88a478b3b": { | |
| "model_module": "@jupyter-widgets/controls", | |
| "model_name": "HTMLModel", | |
| "state": { | |
| "_view_name": "HTMLView", | |
| "style": "IPY_MODEL_45b546123fec4d64b6e3c55991f68a7f", | |
| "_dom_classes": [], | |
| "description": "", | |
| "_model_name": "HTMLModel", | |
| "placeholder": "β", | |
| "_view_module": "@jupyter-widgets/controls", | |
| "_model_module_version": "1.5.0", | |
| "value": " 263M/263M [00:04<00:00, 56.0MB/s]", | |
| "_view_count": null, | |
| "_view_module_version": "1.5.0", | |
| "description_tooltip": null, | |
| "_model_module": "@jupyter-widgets/controls", | |
| "layout": "IPY_MODEL_793c1c8f87ab4138a58d194172632fca" | |
| } | |
| }, | |
| "3c212742117f4a27aaca29fcde46c709": { | |
| "model_module": "@jupyter-widgets/controls", | |
| "model_name": "ProgressStyleModel", | |
| "state": { | |
| "_view_name": "StyleView", | |
| "_model_name": "ProgressStyleModel", | |
| "description_width": "initial", | |
| "_view_module": "@jupyter-widgets/base", | |
| "_model_module_version": "1.5.0", | |
| "_view_count": null, | |
| "_view_module_version": "1.2.0", | |
| "bar_color": null, | |
| "_model_module": "@jupyter-widgets/controls" | |
| } | |
| }, | |
| "8b3db1af7ec844079405c2a80c88f813": { | |
| "model_module": "@jupyter-widgets/base", | |
| "model_name": "LayoutModel", | |
| "state": { | |
| "_view_name": "LayoutView", | |
| "grid_template_rows": null, | |
| "right": null, | |
| "justify_content": null, | |
| "_view_module": "@jupyter-widgets/base", | |
| "overflow": null, | |
| "_model_module_version": "1.2.0", | |
| "_view_count": null, | |
| "flex_flow": null, | |
| "width": null, | |
| "min_width": null, | |
| "border": null, | |
| "align_items": null, | |
| "bottom": null, | |
| "_model_module": "@jupyter-widgets/base", | |
| "top": null, | |
| "grid_column": null, | |
| "overflow_y": null, | |
| "overflow_x": null, | |
| "grid_auto_flow": null, | |
| "grid_area": null, | |
| "grid_template_columns": null, | |
| "flex": null, | |
| "_model_name": "LayoutModel", | |
| "justify_items": null, | |
| "grid_row": null, | |
| "max_height": null, | |
| "align_content": null, | |
| "visibility": null, | |
| "align_self": null, | |
| "height": null, | |
| "min_height": null, | |
| "padding": null, | |
| "grid_auto_rows": null, | |
| "grid_gap": null, | |
| "max_width": null, | |
| "order": null, | |
| "_view_module_version": "1.2.0", | |
| "grid_template_areas": null, | |
| "object_position": null, | |
| "object_fit": null, | |
| "grid_auto_columns": null, | |
| "margin": null, | |
| "display": null, | |
| "left": null | |
| } | |
| }, | |
| "45b546123fec4d64b6e3c55991f68a7f": { | |
| "model_module": "@jupyter-widgets/controls", | |
| "model_name": "DescriptionStyleModel", | |
| "state": { | |
| "_view_name": "StyleView", | |
| "_model_name": "DescriptionStyleModel", | |
| "description_width": "", | |
| "_view_module": "@jupyter-widgets/base", | |
| "_model_module_version": "1.5.0", | |
| "_view_count": null, | |
| "_view_module_version": "1.2.0", | |
| "_model_module": "@jupyter-widgets/controls" | |
| } | |
| }, | |
| "793c1c8f87ab4138a58d194172632fca": { | |
| "model_module": "@jupyter-widgets/base", | |
| "model_name": "LayoutModel", | |
| "state": { | |
| "_view_name": "LayoutView", | |
| "grid_template_rows": null, | |
| "right": null, | |
| "justify_content": null, | |
| "_view_module": "@jupyter-widgets/base", | |
| "overflow": null, | |
| "_model_module_version": "1.2.0", | |
| "_view_count": null, | |
| "flex_flow": null, | |
| "width": null, | |
| "min_width": null, | |
| "border": null, | |
| "align_items": null, | |
| "bottom": null, | |
| "_model_module": "@jupyter-widgets/base", | |
| "top": null, | |
| "grid_column": null, | |
| "overflow_y": null, | |
| "overflow_x": null, | |
| "grid_auto_flow": null, | |
| "grid_area": null, | |
| "grid_template_columns": null, | |
| "flex": null, | |
| "_model_name": "LayoutModel", | |
| "justify_items": null, | |
| "grid_row": null, | |
| "max_height": null, | |
| "align_content": null, | |
| "visibility": null, | |
| "align_self": null, | |
| "height": null, | |
| "min_height": null, | |
| "padding": null, | |
| "grid_auto_rows": null, | |
| "grid_gap": null, | |
| "max_width": null, | |
| "order": null, | |
| "_view_module_version": "1.2.0", | |
| "grid_template_areas": null, | |
| "object_position": null, | |
| "object_fit": null, | |
| "grid_auto_columns": null, | |
| "margin": null, | |
| "display": null, | |
| "left": null | |
| } | |
| } | |
| } | |
| }, | |
| "accelerator": "GPU" | |
| }, | |
| "cells": [ | |
| { | |
| "cell_type": "markdown", | |
| "metadata": { | |
| "id": "view-in-github", | |
| "colab_type": "text" | |
| }, | |
| "source": [ | |
| "<a href=\"https://colab.research.google.com/gist/neoyipeng2018/cbe64b40ad683ff50b15fb67fa39fabd/finetuningwithtrainer.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": { | |
| "id": "BXmJ_IPxb8cn", | |
| "colab_type": "text" | |
| }, | |
| "source": [ | |
| "#Fine Tuning a Language Model using π€'s Trainer\n", | |
| "In some recent NLP competitions I entered, I noticed the huge benefits of fine tuning a language model before starting to further fine tune for downstream tasks.\n", | |
| "\n", | |
| "Transformers library has a `Trainer` module which has an end to end train/evaluation loop to fine-tune a transformer model. There are some notebooks/guides but I was looking for a simple example that contains all the basic needs like setting up the dataset, evaluation metrics, tensorboards etc, but couldn't really find one, so I decided to create a one that contains everything I needed to start.\n", | |
| "\n", | |
| "**References**:\n", | |
| "1. https://zablo.net/blog/post/training-roberta-from-scratch-the-missing-guide-polish-language-model/\n", | |
| "1. https://colab.research.google.com/github/huggingface/blog/blob/master/notebooks/01_how_to_train.ipynb#scrollTo=GlvP_A-THEEl\n", | |
| "1. https://skimai.com/roberta-language-model-for-spanish/" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": { | |
| "id": "-skY1JbAbEQ6", | |
| "colab_type": "text" | |
| }, | |
| "source": [ | |
| "## Installing stuffs" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "metadata": { | |
| "id": "3pPYShGjTAxW", | |
| "colab_type": "code", | |
| "colab": {} | |
| }, | |
| "source": [ | |
| "!pip install transformers\n", | |
| "!pip install tokenizers\n", | |
| "!pip install tensorboard" | |
| ], | |
| "execution_count": null, | |
| "outputs": [] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": { | |
| "id": "bZO302pF7mig", | |
| "colab_type": "text" | |
| }, | |
| "source": [ | |
| "## Ingredients for Langugage Modelling\n", | |
| "1. Model\n", | |
| "1. Dataset\n", | |
| "1. Trainer" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": { | |
| "id": "3slpjqSp8zsZ", | |
| "colab_type": "text" | |
| }, | |
| "source": [ | |
| "### 1. Initialise Model and Tokenizer\n", | |
| "This is pretty straightforward thanks to π€. You essentially just need to choose a model name from https://huggingface.co/models" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "metadata": { | |
| "id": "OD71y3mYZ-tP", | |
| "colab_type": "code", | |
| "colab": { | |
| "base_uri": "https://localhost:8080/", | |
| "height": 182, | |
| "referenced_widgets": [ | |
| "c6a73993a49c432ba584ec488247c016", | |
| "2a083ff521a749e2b684c6bd3005bc13", | |
| "bbcace5b2216435babe5d2e758894022", | |
| "637a2c14acab4d84ad9fd9fcd2a110f8", | |
| "1cd5520bc2164cb5aa4e9722ac664438", | |
| "224b1f677b2c496bbf3037952287207d", | |
| "da0e168573b74ae79336994546cee36d", | |
| "9312c749c6cd419f90f15fd8fab2bdb2", | |
| "f7721aa245fd40fcbaecf1fbc6855a39", | |
| "4754b43cca614df7af16afb508c99b8a", | |
| "3d3e2d7609454dae98ec0a85f4e219ea", | |
| "b0e7c0e8454e48698b9aff2271a59683", | |
| "ab1af4531e154c5ab80e940e5c8e3746", | |
| "70b83f9da5744ed0a91d5c9967f6cb7b", | |
| "5960e702f1724327942731b84f3ced8f", | |
| "2cc334cd9fc04419bcaeaec4f5a0f328", | |
| "f3ab3e82cd7b4a85b530576a7e26cde2", | |
| "a060bdc45a754f6aae21badd838475a5", | |
| "532d0411a8a14b27b426663a4e875bf5", | |
| "4aa71e4105804cc889cd99a88a478b3b", | |
| "3c212742117f4a27aaca29fcde46c709", | |
| "8b3db1af7ec844079405c2a80c88f813", | |
| "45b546123fec4d64b6e3c55991f68a7f", | |
| "793c1c8f87ab4138a58d194172632fca" | |
| ] | |
| }, | |
| "outputId": "b04d0c84-7a05-4ad5-ddc3-df92efe17a00" | |
| }, | |
| "source": [ | |
| "from transformers import AutoConfig,AutoTokenizer,AutoModelForPreTraining\n", | |
| "\n", | |
| "modelnm='distilbert-base-cased'\n", | |
| "tokenizer=AutoTokenizer.from_pretrained(modelnm)\n", | |
| "model=AutoModelForPreTraining.from_pretrained(modelnm)\n", | |
| "\n", | |
| "#test tokenizer\n", | |
| "tokenizer.tokenize('extravagant')" | |
| ], | |
| "execution_count": null, | |
| "outputs": [ | |
| { | |
| "output_type": "display_data", | |
| "data": { | |
| "application/vnd.jupyter.widget-view+json": { | |
| "model_id": "c6a73993a49c432ba584ec488247c016", | |
| "version_minor": 0, | |
| "version_major": 2 | |
| }, | |
| "text/plain": [ | |
| "HBox(children=(FloatProgress(value=0.0, description='Downloading', max=411.0, style=ProgressStyle(description_β¦" | |
| ] | |
| }, | |
| "metadata": { | |
| "tags": [] | |
| } | |
| }, | |
| { | |
| "output_type": "stream", | |
| "text": [ | |
| "\n" | |
| ], | |
| "name": "stdout" | |
| }, | |
| { | |
| "output_type": "display_data", | |
| "data": { | |
| "application/vnd.jupyter.widget-view+json": { | |
| "model_id": "f7721aa245fd40fcbaecf1fbc6855a39", | |
| "version_minor": 0, | |
| "version_major": 2 | |
| }, | |
| "text/plain": [ | |
| "HBox(children=(FloatProgress(value=0.0, description='Downloading', max=213450.0, style=ProgressStyle(descriptiβ¦" | |
| ] | |
| }, | |
| "metadata": { | |
| "tags": [] | |
| } | |
| }, | |
| { | |
| "output_type": "stream", | |
| "text": [ | |
| "\n" | |
| ], | |
| "name": "stdout" | |
| }, | |
| { | |
| "output_type": "display_data", | |
| "data": { | |
| "application/vnd.jupyter.widget-view+json": { | |
| "model_id": "f3ab3e82cd7b4a85b530576a7e26cde2", | |
| "version_minor": 0, | |
| "version_major": 2 | |
| }, | |
| "text/plain": [ | |
| "HBox(children=(FloatProgress(value=0.0, description='Downloading', max=263273408.0, style=ProgressStyle(descriβ¦" | |
| ] | |
| }, | |
| "metadata": { | |
| "tags": [] | |
| } | |
| }, | |
| { | |
| "output_type": "stream", | |
| "text": [ | |
| "\n" | |
| ], | |
| "name": "stdout" | |
| }, | |
| { | |
| "output_type": "execute_result", | |
| "data": { | |
| "text/plain": [ | |
| "['extra', '##va', '##gant']" | |
| ] | |
| }, | |
| "metadata": { | |
| "tags": [] | |
| }, | |
| "execution_count": 2 | |
| } | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": { | |
| "id": "VOBqfWOn1zBQ", | |
| "colab_type": "text" | |
| }, | |
| "source": [ | |
| "### 2. Creating train and evaluation dataset\n", | |
| "Quite often, my datasets are in a .txt or .csv. There are 2 ways to create a dataset in huggingface from text files:\n", | |
| "\n", | |
| "1. **LineByLineTextDataset**: Assumes each line is a document, and tokenizer will only run once on each line, hence documents that are longer than the block size will be truncated.\n", | |
| "2. **TextDataset**: Assumes the documents are one big corpus, and splits using block size. There won't be any padding though.\n", | |
| "\n", | |
| "For this use case, I'll just grab a Amazon Review from https://nijianmo.github.io/amazon/index.html" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "metadata": { | |
| "id": "-CfqLPJVYEKS", | |
| "colab_type": "code", | |
| "colab": { | |
| "base_uri": "https://localhost:8080/", | |
| "height": 208 | |
| }, | |
| "outputId": "5e2acf1e-2118-415e-9f10-dca1f04d5601" | |
| }, | |
| "source": [ | |
| "!wget http://deepyeti.ucsd.edu/jianmo/amazon/categoryFilesSmall/AMAZON_FASHION_5.json.gz" | |
| ], | |
| "execution_count": null, | |
| "outputs": [ | |
| { | |
| "output_type": "stream", | |
| "text": [ | |
| "--2020-09-03 14:41:51-- http://deepyeti.ucsd.edu/jianmo/amazon/categoryFilesSmall/AMAZON_FASHION_5.json.gz\n", | |
| "Resolving deepyeti.ucsd.edu (deepyeti.ucsd.edu)... 169.228.63.50\n", | |
| "Connecting to deepyeti.ucsd.edu (deepyeti.ucsd.edu)|169.228.63.50|:80... connected.\n", | |
| "HTTP request sent, awaiting response... 200 OK\n", | |
| "Length: 287013 (280K) [application/octet-stream]\n", | |
| "Saving to: βAMAZON_FASHION_5.json.gzβ\n", | |
| "\n", | |
| "AMAZON_FASHION_5.js 100%[===================>] 280.29K --.-KB/s in 0.1s \n", | |
| "\n", | |
| "2020-09-03 14:41:51 (2.22 MB/s) - βAMAZON_FASHION_5.json.gzβ saved [287013/287013]\n", | |
| "\n" | |
| ], | |
| "name": "stdout" | |
| } | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "metadata": { | |
| "id": "kyFpWUyvXMwq", | |
| "colab_type": "code", | |
| "colab": { | |
| "base_uri": "https://localhost:8080/", | |
| "height": 34 | |
| }, | |
| "outputId": "cca1794f-bbe0-4b13-f7eb-18a9a2e7bdf7" | |
| }, | |
| "source": [ | |
| "import gzip, json, pandas as pd\n", | |
| "\n", | |
| "data = []\n", | |
| "with gzip.open('AMAZON_FASHION_5.json.gz') as f:\n", | |
| " for l in f:\n", | |
| " data.append(json.loads(l.strip()))\n", | |
| "df = pd.DataFrame.from_dict(data)\n", | |
| "\n", | |
| "print(len(df))" | |
| ], | |
| "execution_count": null, | |
| "outputs": [ | |
| { | |
| "output_type": "stream", | |
| "text": [ | |
| "3176\n" | |
| ], | |
| "name": "stdout" | |
| } | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "metadata": { | |
| "id": "F_ZejltASXU-", | |
| "colab_type": "code", | |
| "colab": {} | |
| }, | |
| "source": [ | |
| "df[['reviewText']][:3000].to_csv('train.csv')\n", | |
| "df[['reviewText']][3000:].to_csv('val.csv')" | |
| ], | |
| "execution_count": null, | |
| "outputs": [] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "metadata": { | |
| "id": "C3QVwSGWUc4z", | |
| "colab_type": "code", | |
| "colab": {} | |
| }, | |
| "source": [ | |
| "from transformers import LineByLineTextDataset,DataCollatorForLanguageModeling,TextDataset" | |
| ], | |
| "execution_count": null, | |
| "outputs": [] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "metadata": { | |
| "id": "087O5Kb6M7Bj", | |
| "colab_type": "code", | |
| "colab": {} | |
| }, | |
| "source": [ | |
| "train_dataset = TextDataset(\n", | |
| " tokenizer=tokenizer,\n", | |
| " file_path='train.csv',\n", | |
| " block_size=128\n", | |
| ")\n", | |
| "\n", | |
| "val_dataset = TextDataset(\n", | |
| " tokenizer=tokenizer,\n", | |
| " file_path='val.csv',\n", | |
| " block_size=128\n", | |
| ")\n", | |
| "\n", | |
| "# LineBYLine\n", | |
| "# train_dataset = LineByLineTextDataset(\n", | |
| "# tokenizer=tokenizer,\n", | |
| "# file_path=\"train.csv\",\n", | |
| "# block_size=512\n", | |
| "# )\n", | |
| "\n", | |
| "# val_dataset = LineByLineTextDataset(\n", | |
| "# tokenizer=tokenizer,\n", | |
| "# file_path=\"val.csv\",\n", | |
| "# block_size=512\n", | |
| "# )\n", | |
| "# data_collator = DataCollatorForLanguageModeling(tokenizer=tokenizer,mlm=True,mlm_probability=0.15)\n", | |
| "\n", | |
| "data_collator = DataCollatorForLanguageModeling(tokenizer=tokenizer,mlm=True,mlm_probability=0.15)" | |
| ], | |
| "execution_count": null, | |
| "outputs": [] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": { | |
| "id": "Z7U1QxGZ77Zk", | |
| "colab_type": "text" | |
| }, | |
| "source": [ | |
| "### 3. Trainer Arguments and Trainer\n", | |
| "We initialise the training arguments and also Trainer" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "metadata": { | |
| "id": "_A8OQiZ1UBrG", | |
| "colab_type": "code", | |
| "colab": { | |
| "base_uri": "https://localhost:8080/", | |
| "height": 364 | |
| }, | |
| "outputId": "d583e58a-6bd5-4307-a4bf-ef20f01f15c4" | |
| }, | |
| "source": [ | |
| "# Check that we have a GPU\n", | |
| "!nvidia-smi" | |
| ], | |
| "execution_count": null, | |
| "outputs": [ | |
| { | |
| "output_type": "stream", | |
| "text": [ | |
| "Thu Sep 3 14:43:04 2020 \n", | |
| "+-----------------------------------------------------------------------------+\n", | |
| "| NVIDIA-SMI 450.66 Driver Version: 418.67 CUDA Version: 10.1 |\n", | |
| "|-------------------------------+----------------------+----------------------+\n", | |
| "| GPU Name Persistence-M| Bus-Id Disp.A | Volatile Uncorr. ECC |\n", | |
| "| Fan Temp Perf Pwr:Usage/Cap| Memory-Usage | GPU-Util Compute M. |\n", | |
| "| | | MIG M. |\n", | |
| "|===============================+======================+======================|\n", | |
| "| 0 Tesla P100-PCIE... Off | 00000000:00:04.0 Off | 0 |\n", | |
| "| N/A 42C P0 25W / 250W | 0MiB / 16280MiB | 0% Default |\n", | |
| "| | | ERR! |\n", | |
| "+-------------------------------+----------------------+----------------------+\n", | |
| " \n", | |
| "+-----------------------------------------------------------------------------+\n", | |
| "| Processes: |\n", | |
| "| GPU GI CI PID Type Process name GPU Memory |\n", | |
| "| ID ID Usage |\n", | |
| "|=============================================================================|\n", | |
| "| No running processes found |\n", | |
| "+-----------------------------------------------------------------------------+\n" | |
| ], | |
| "name": "stdout" | |
| } | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "metadata": { | |
| "id": "z424e05WUFrz", | |
| "colab_type": "code", | |
| "colab": { | |
| "base_uri": "https://localhost:8080/", | |
| "height": 34 | |
| }, | |
| "outputId": "d913f282-b185-4f87-eeb8-29534e537988" | |
| }, | |
| "source": [ | |
| "import torch\n", | |
| "torch.cuda.is_available()" | |
| ], | |
| "execution_count": null, | |
| "outputs": [ | |
| { | |
| "output_type": "execute_result", | |
| "data": { | |
| "text/plain": [ | |
| "True" | |
| ] | |
| }, | |
| "metadata": { | |
| "tags": [] | |
| }, | |
| "execution_count": 9 | |
| } | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": { | |
| "id": "Gm-zq6m5T8zC", | |
| "colab_type": "text" | |
| }, | |
| "source": [ | |
| "### Using Trainer\n", | |
| "- For details on training arguments: https://huggingface.co/transformers/main_classes/trainer.html\n" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": { | |
| "id": "SvTNFlOVi5xA", | |
| "colab_type": "text" | |
| }, | |
| "source": [ | |
| "To get the Trainer to compute accuracy for our Masked Language Modelling, we create these 2 functions" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "metadata": { | |
| "id": "e57i8G6AEXvb", | |
| "colab_type": "code", | |
| "colab": {} | |
| }, | |
| "source": [ | |
| "def accuracy(input, targs):\n", | |
| " \"Computes accuracy with `targs` when `input` is bs * n_classes.\"\n", | |
| " n = targs.shape[0]\n", | |
| " #input = input.argmax(dim=-1).view(n,-1)\n", | |
| " input = input.reshape(n,-1)\n", | |
| " targs = targs.reshape(n,-1)\n", | |
| " # return (input==targs).float().mean()\n", | |
| " return (input==targs).mean()" | |
| ], | |
| "execution_count": null, | |
| "outputs": [] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "metadata": { | |
| "id": "GexxpXt3EX7e", | |
| "colab_type": "code", | |
| "colab": {} | |
| }, | |
| "source": [ | |
| "def compute_metrics(pred):\n", | |
| " labels = pred.label_ids\n", | |
| " preds = pred.predictions.argmax(-1)\n", | |
| " acc = accuracy(labels, preds)\n", | |
| " return {\n", | |
| " 'accuracy': acc\n", | |
| " }" | |
| ], | |
| "execution_count": null, | |
| "outputs": [] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": { | |
| "id": "F33spPoZjCfn", | |
| "colab_type": "text" | |
| }, | |
| "source": [ | |
| "Start tensorboard writer" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "metadata": { | |
| "id": "HE4qoiWpOxMK", | |
| "colab_type": "code", | |
| "colab": {} | |
| }, | |
| "source": [ | |
| "from torch.utils.tensorboard import SummaryWriter\n", | |
| "tb = SummaryWriter()" | |
| ], | |
| "execution_count": null, | |
| "outputs": [] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": { | |
| "id": "wjeKhkrGjJhm", | |
| "colab_type": "text" | |
| }, | |
| "source": [ | |
| "Define training arguments and trainer" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "metadata": { | |
| "id": "BPgFNONpEYAW", | |
| "colab_type": "code", | |
| "colab": {} | |
| }, | |
| "source": [ | |
| "from transformers import Trainer, TrainingArguments\n", | |
| "\n", | |
| "training_args = TrainingArguments(\n", | |
| " output_dir=\"./model\",\n", | |
| " overwrite_output_dir=True,\n", | |
| " num_train_epochs=3,\n", | |
| " per_device_train_batch_size=8,\n", | |
| " per_device_eval_batch_size=8,\n", | |
| " save_steps=1000,\n", | |
| " save_total_limit=1,\n", | |
| " learning_rate=5e-5, #In BERT, fine tuning lrs were in the range of (2e-5 to 5e-5)\n", | |
| " do_train=True,\n", | |
| " evaluate_during_training=True,\n", | |
| " # warmup_steps=1000 #In BERT, pre-training phase had 10k warmup\n", | |
| " logging_steps=1,\n", | |
| " eval_steps=1,\n", | |
| " gradient_accumulation_steps=8, #reduce memory usage while allowing bigger overall batch size. Roberta used this technique to get a 8k batch size.s\n", | |
| ")\n", | |
| "\n", | |
| "trainer = Trainer(\n", | |
| " model=model,\n", | |
| " args=training_args,\n", | |
| " data_collator=data_collator,\n", | |
| " train_dataset=train_dataset,\n", | |
| " eval_dataset=val_dataset,\n", | |
| " compute_metrics=compute_metrics,\n", | |
| " tb_writer=tb\n", | |
| ")" | |
| ], | |
| "execution_count": null, | |
| "outputs": [] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": { | |
| "id": "2R5zX616jN2r", | |
| "colab_type": "text" | |
| }, | |
| "source": [ | |
| "Train" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "metadata": { | |
| "id": "JgBw1Ap8EYFF", | |
| "colab_type": "code", | |
| "colab": {} | |
| }, | |
| "source": [ | |
| "%%time\n", | |
| "trainer.train()" | |
| ], | |
| "execution_count": null, | |
| "outputs": [] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "metadata": { | |
| "id": "SFy2fG0cJ51H", | |
| "colab_type": "code", | |
| "colab": { | |
| "base_uri": "https://localhost:8080/", | |
| "height": 766 | |
| }, | |
| "outputId": "dc59a426-2c89-42c5-aa87-e879f34fa541" | |
| }, | |
| "source": [ | |
| "!tensorboard dev upload --logdir runs" | |
| ], | |
| "execution_count": null, | |
| "outputs": [ | |
| { | |
| "output_type": "stream", | |
| "text": [ | |
| "2020-09-03 14:45:50.966465: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library libcudart.so.10.1\n", | |
| "\n", | |
| "***** TensorBoard Uploader *****\n", | |
| "\n", | |
| "This will upload your TensorBoard logs to https://tensorboard.dev/ from\n", | |
| "the following directory:\n", | |
| "\n", | |
| "runs\n", | |
| "\n", | |
| "This TensorBoard will be visible to everyone. Do not upload sensitive\n", | |
| "data.\n", | |
| "\n", | |
| "Your use of this service is subject to Google's Terms of Service\n", | |
| "<https://policies.google.com/terms> and Privacy Policy\n", | |
| "<https://policies.google.com/privacy>, and TensorBoard.dev's Terms of Service\n", | |
| "<https://tensorboard.dev/policy/terms/>.\n", | |
| "\n", | |
| "This notice will not be shown again while you are logged into the uploader.\n", | |
| "To log out, run `tensorboard dev auth revoke`.\n", | |
| "\n", | |
| "Continue? (yes/NO) yes\n", | |
| "\n", | |
| "Please visit this URL to authorize this application: https://accounts.google.com/o/oauth2/auth?response_type=code&client_id=373649185512-8v619h5kft38l4456nm2dj4ubeqsrvh6.apps.googleusercontent.com&redirect_uri=urn%3Aietf%3Awg%3Aoauth%3A2.0%3Aoob&scope=openid+https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fuserinfo.email&state=DySudJM7iN3tEjewfPCdKAL8frAksT&prompt=consent&access_type=offline\n", | |
| "Enter the authorization code: 4/3gE8vDdu5I96mSAvBzuMa6vE6QTItHBxh_2SIxwJISxTuadWuk4O3MU\n", | |
| "\n", | |
| "Data for the \"graphs\" plugin is now uploaded to TensorBoard.dev! Note that uploaded data is public. If you do not want to upload data for this plugin, use the \"--plugins\" command line argument.\n", | |
| "Data for the \"histograms\" plugin is now uploaded to TensorBoard.dev! Note that uploaded data is public. If you do not want to upload data for this plugin, use the \"--plugins\" command line argument.\n", | |
| "Data for the \"hparams\" plugin is now uploaded to TensorBoard.dev! Note that uploaded data is public. If you do not want to upload data for this plugin, use the \"--plugins\" command line argument.\n", | |
| "Upload started and will continue reading any new data as it's added\n", | |
| "to the logdir. To stop uploading, press Ctrl-C.\n", | |
| "\n", | |
| "View your TensorBoard live at: https://tensorboard.dev/experiment/DIl7t4EkQym5kGXEXO4qTw/\n", | |
| "\n", | |
| "\u001b[1m[2020-09-03T14:46:10]\u001b[0m Uploader started.\n", | |
| "\u001b[1m[2020-09-03T14:46:10]\u001b[0m Total uploaded: 216 scalars, 3 tensors (18 B), 0 binary objects\n", | |
| "\n", | |
| "Interrupted. View your TensorBoard at https://tensorboard.dev/experiment/DIl7t4EkQym5kGXEXO4qTw/\n", | |
| "Exception ignored in: <bound method Channel.__del__ of <grpc._channel.Channel object at 0x7fc9ed7cd3c8>>\n", | |
| "Traceback (most recent call last):\n", | |
| " File \"/usr/local/lib/python3.6/dist-packages/grpc/_channel.py\", line 1446, in __del__\n", | |
| " def __del__(self):\n", | |
| "KeyboardInterrupt\n" | |
| ], | |
| "name": "stdout" | |
| } | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": { | |
| "id": "_8EMioIuiECl", | |
| "colab_type": "text" | |
| }, | |
| "source": [ | |
| "Save" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "metadata": { | |
| "id": "2y4E9Y36jQyT", | |
| "colab_type": "code", | |
| "colab": {} | |
| }, | |
| "source": [ | |
| "trainer.save_model(\"./model\")" | |
| ], | |
| "execution_count": null, | |
| "outputs": [] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": { | |
| "id": "4l-hhP-GaX_j", | |
| "colab_type": "text" | |
| }, | |
| "source": [ | |
| "# Conclusion\n", | |
| "We've initialised a transformer model, loaded a dataset and fine-tuned a pre-trained language model for a few epochs and visualised the valiation loss and accuracy in tensorboard." | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": { | |
| "id": "CpIunQmZYcg_", | |
| "colab_type": "text" | |
| }, | |
| "source": [ | |
| "Next: Explore hyperparameter search\n", | |
| "1. https://huggingface.co/transformers/master/main_classes/trainer.html#transformers.Trainer.hyperparameter_search\n", | |
| "1. https://discuss.huggingface.co/t/using-hyperparameter-search-in-trainer/785/10" | |
| ] | |
| } | |
| ] | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment