mrs83/xlstm-7b-instruct-text-generation-test.ipynb

## xlstm-7b-instruct-text-generation-test.ipynb
{
  "nbformat": 4,
  "nbformat_minor": 0,
  "metadata": {
    "colab": {
      "provenance": [],
      "gpuType": "A100",
      "authorship_tag": "ABX9TyNFS+rUszt4Zv+NvprtNZ9F",
      "include_colab_link": true
    },
    "kernelspec": {
      "name": "python3",
      "display_name": "Python 3"
    },
    "language_info": {
      "name": "python"
    },
    "accelerator": "GPU",
    "widgets": {
      "application/vnd.jupyter.widget-state+json": {
        "0dd10c3eecd64462acc56eae1fb82cb2": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HBoxModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HBoxModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HBoxView",
            "box_style": "",
            "children": [
              "IPY_MODEL_00e3757b55ad448ea050a8d40d9b09be",
              "IPY_MODEL_596645777acb4001a5da04819d7e105a",
              "IPY_MODEL_b388d68e27a84c84b597bab1dc0db5b7"
            ],
            "layout": "IPY_MODEL_bceb8ff98b6345dc830a0756e8dc37b0"
          }
        },
        "00e3757b55ad448ea050a8d40d9b09be": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HTMLModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HTMLView",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_5010a6f07ce44d8ca49b4ecf41140d54",
            "placeholder": "",
            "style": "IPY_MODEL_515f08efb3ea4a8cbd6e8ef8481d5acb",
            "value": "Fetching 3 files: 100%"
          }
        },
        "596645777acb4001a5da04819d7e105a": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "FloatProgressModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "FloatProgressModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "ProgressView",
            "bar_style": "success",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_b7b530b3a2a1427b8a3759614d120772",
            "max": 3,
            "min": 0,
            "orientation": "horizontal",
            "style": "IPY_MODEL_4e03d98ee14a4ffcb7bbe62718677602",
            "value": 3
          }
        },
        "b388d68e27a84c84b597bab1dc0db5b7": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HTMLModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HTMLView",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_847ef889c21a461fb7e77d54d0edab23",
            "placeholder": "",
            "style": "IPY_MODEL_905292f5101949f6a63444349ba68464",
            "value": " 3/3 [00:00&lt;00:00,  5.74it/s]"
          }
        },
        "bceb8ff98b6345dc830a0756e8dc37b0": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "5010a6f07ce44d8ca49b4ecf41140d54": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "515f08efb3ea4a8cbd6e8ef8481d5acb": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "DescriptionStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "description_width": ""
          }
        },
        "b7b530b3a2a1427b8a3759614d120772": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "4e03d98ee14a4ffcb7bbe62718677602": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "ProgressStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "ProgressStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "bar_color": null,
            "description_width": ""
          }
        },
        "847ef889c21a461fb7e77d54d0edab23": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "905292f5101949f6a63444349ba68464": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "DescriptionStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "description_width": ""
          }
        },
        "65f21cb5cf6c431a9a36311edf3529c2": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HBoxModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HBoxModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HBoxView",
            "box_style": "",
            "children": [
              "IPY_MODEL_260c186ea38641c89509aac0177ca4fb",
              "IPY_MODEL_99e74f703946490eb93ecf30cd0c85b3",
              "IPY_MODEL_20d36aa22bb04bcca1e69abc84952189"
            ],
            "layout": "IPY_MODEL_66f050dd0cb94b62ae753ad4279b2f24"
          }
        },
        "260c186ea38641c89509aac0177ca4fb": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HTMLModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HTMLView",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_f0ed43c3d46a4920a65a02e3485fec39",
            "placeholder": "",
            "style": "IPY_MODEL_b109e13ef458459f9313ee84d1c41cb8",
            "value": "Loading checkpoint shards: 100%"
          }
        },
        "99e74f703946490eb93ecf30cd0c85b3": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "FloatProgressModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "FloatProgressModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "ProgressView",
            "bar_style": "success",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_0a90d915d85242f08bda898768f3c000",
            "max": 3,
            "min": 0,
            "orientation": "horizontal",
            "style": "IPY_MODEL_6840601971b9433cacf8acdc81f68620",
            "value": 3
          }
        },
        "20d36aa22bb04bcca1e69abc84952189": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HTMLModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HTMLView",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_f3b04b0c018e497a9308afff257deec1",
            "placeholder": "",
            "style": "IPY_MODEL_0bab38ea74c74f8580ec5bbeeccb3df4",
            "value": " 3/3 [00:04&lt;00:00,  1.31s/it]"
          }
        },
        "66f050dd0cb94b62ae753ad4279b2f24": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "f0ed43c3d46a4920a65a02e3485fec39": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "b109e13ef458459f9313ee84d1c41cb8": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "DescriptionStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "description_width": ""
          }
        },
        "0a90d915d85242f08bda898768f3c000": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "6840601971b9433cacf8acdc81f68620": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "ProgressStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "ProgressStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "bar_color": null,
            "description_width": ""
          }
        },
        "f3b04b0c018e497a9308afff257deec1": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "0bab38ea74c74f8580ec5bbeeccb3df4": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "DescriptionStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "description_width": ""
          }
        },
        "98332b54dec24f858dfab05f53904eec": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HBoxModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HBoxModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HBoxView",
            "box_style": "",
            "children": [
              "IPY_MODEL_221bc1ead32a4d67932219d21920aa07",
              "IPY_MODEL_33d1b4a6dd404a7db3610d76704e1b6a",
              "IPY_MODEL_a49c500da1904adca1261ecd4ba47feb"
            ],
            "layout": "IPY_MODEL_5f729ca3e83840e0b3b4bbe4f9d39437"
          }
        },
        "221bc1ead32a4d67932219d21920aa07": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HTMLModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HTMLView",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_de66545cc5394ccd819c42f0ab2fe03e",
            "placeholder": "",
            "style": "IPY_MODEL_2fb52d0cdb8d4196a01bfe40a3543a74",
            "value": "chat_template.jinja: 100%"
          }
        },
        "33d1b4a6dd404a7db3610d76704e1b6a": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "FloatProgressModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "FloatProgressModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "ProgressView",
            "bar_style": "success",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_8f833984323b40569ce4094a01c64d86",
            "max": 3429,
            "min": 0,
            "orientation": "horizontal",
            "style": "IPY_MODEL_ba1757cbd6be4152af1198cdb3f4cc6e",
            "value": 3429
          }
        },
        "a49c500da1904adca1261ecd4ba47feb": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HTMLModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HTMLView",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_39429f94fea246f5b88daea53b2bcd32",
            "placeholder": "",
            "style": "IPY_MODEL_9736cfb561024660bd4bfad6d8242b5a",
            "value": " 3.43k/3.43k [00:00&lt;00:00, 424kB/s]"
          }
        },
        "5f729ca3e83840e0b3b4bbe4f9d39437": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "de66545cc5394ccd819c42f0ab2fe03e": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "2fb52d0cdb8d4196a01bfe40a3543a74": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "DescriptionStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "description_width": ""
          }
        },
        "8f833984323b40569ce4094a01c64d86": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "ba1757cbd6be4152af1198cdb3f4cc6e": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "ProgressStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "ProgressStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "bar_color": null,
            "description_width": ""
          }
        },
        "39429f94fea246f5b88daea53b2bcd32": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "9736cfb561024660bd4bfad6d8242b5a": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "DescriptionStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "description_width": ""
          }
        }
      }
    }
  },
  "cells": [
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "view-in-github",
        "colab_type": "text"
      },
      "source": [
        "<a href=\"https://colab.research.google.com/gist/mrs83/342d23c8bcceae22384c96d960aa62ac/xlstm-7b-instruct-text-generation-test.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "!pip install \"torch==2.5.1\" \"torchvision==0.20.1\" \"torchaudio==2.5.1\" --index-url https://download.pytorch.org/whl/cu124\n",
        "!pip install \"triton==3.4.0\"  # >=3.1 is OK; 3.4.0 current as of Sep 2025\n",
        "!pip install \"mlstm-kernels==2.0.1\" \"xlstm==2.0.5\"\n",
        "import math, traceback, torch\n",
        "from mlstm_kernels.torch import get_available_mlstm_kernels, get_mlstm_kernel\n",
        "from transformers import xLSTMConfig, xLSTMModel\n",
        "\n",
        "print(\"torch\", torch.__version__)\n",
        "try: import triton; print(\"triton\", triton.__version__)\n",
        "except: pass\n",
        "import mlstm_kernels as mk; print(\"mlstm_kernels\", mk.__version__)\n",
        "import xlstm as _xl; print(\"xlstm\", _xl.__version__)\n",
        "print(\"available kernels:\", get_available_mlstm_kernels())\n",
        "\n",
        "device = \"cuda\" if torch.cuda.is_available() else \"cpu\"\n",
        "# bfloat16 preferred on recent NVIDIA; fallback to float16 otherwise.\n",
        "cap = torch.cuda.get_device_capability(0) if device==\"cuda\" else (0,0)\n",
        "dtype = torch.bfloat16 if device==\"cuda\" and cap >= (8,0) else torch.float16\n",
        "\n",
        "# ---------------- REPRO 1: HF xLSTM 'not enough values to unpack' ----------------\n",
        "# Using short seq + different QK and V head dims to stress chunkwise code paths.\n",
        "B, S_true, D, H = 1, 6, 128, 8    # head_dim = 16\n",
        "x = torch.randn(B, S_true, D, device=device, dtype=dtype)\n",
        "\n",
        "cfg_bad = xLSTMConfig(\n",
        "    hidden_size=D, num_heads=H,\n",
        "    qk_dim_factor=0.5, v_dim_factor=1.0,   # dqk != dv\n",
        "    mode=\"train\",                          # train path\n",
        "    chunk_size=3,\n",
        "    chunkwise_kernel=\"chunkwise--native_autograd\",\n",
        "    sequence_kernel=\"native_sequence__native\",\n",
        "    step_kernel=\"native\",\n",
        "    return_last_states=False,              # <-- repro flag\n",
        "    use_cache=False,\n",
        ")\n",
        "\n",
        "print(\"\\n=== REPRO 1: HF xLSTM with return_last_states=False ===\")\n",
        "m_bad = xLSTMModel(cfg_bad).to(device)\n",
        "try:\n",
        "    _ = m_bad(inputs_embeds=x).last_hidden_state\n",
        "    print(\"No error (your local combo may already be fixed).\")\n",
        "except Exception as e:\n",
        "    print(\"Expected ERROR:\", type(e).__name__, str(e).splitlines()[0])\n",
        "\n",
        "# ---------------- FIX 1: set return_last_states=True ----------------\n",
        "print(\"=== FIX 1: HF xLSTM with return_last_states=True ===\")\n",
        "cfg_fix = cfg_bad\n",
        "cfg_fix.return_last_states = True          # required by HF xLSTM train paths\n",
        "m_fix = xLSTMModel(cfg_fix).to(device)\n",
        "_ = m_fix(inputs_embeds=x).last_hidden_state\n",
        "print(\"OK: forward with return_last_states=True\")\n",
        "\n",
        "# ---------------- REPRO 2: Triton TFLA xl_chunk with short S ----------------\n",
        "print(\"\\n=== REPRO 2: Triton 'xl_chunk' with S=6 (expect constraint failure) ===\")\n",
        "dqk = dhv = D // H  # 16\n",
        "def mk_inputs(S):\n",
        "    torch.manual_seed(0)\n",
        "    q = torch.randn(B,H,S,dqk, device=device, dtype=dtype)\n",
        "    k = torch.randn_like(q)\n",
        "    v = torch.randn(B,H,S,dhv, device=device, dtype=dtype)\n",
        "    i = torch.randn(B,H,S, device=device, dtype=dtype)\n",
        "    f = 3.0 + torch.randn(B,H,S, device=device, dtype=dtype)\n",
        "    return dict(q=q,k=k,v=v,i=i,f=f, return_last_states=False)\n",
        "\n",
        "try:\n",
        "    _ = get_mlstm_kernel(\"chunkwise--triton_xl_chunk\")(**mk_inputs(S_true), chunk_size=128)\n",
        "    print(\"Unexpected: xl_chunk accepted S=6.\")\n",
        "except Exception as e:\n",
        "    print(\"Expected ERROR:\", type(e).__name__, str(e).splitlines()[0])\n",
        "\n",
        "# ---------------- FIX 2A: pad S to multiple of 16 for xl_chunk ----------------\n",
        "print(\"=== FIX 2A: pad to S%16==0 for xl_chunk, then slice back ===\")\n",
        "S_pad = ((S_true + 15)//16)*16\n",
        "y_pad = get_mlstm_kernel(\"chunkwise--triton_xl_chunk\")(**mk_inputs(S_pad), chunk_size=128)\n",
        "print(\"OK xl_chunk padded:\", tuple(y_pad.shape), \" -> keep only first\", S_true)\n",
        "\n",
        "# --- FIX 2B: limit_chunk with legal tile sizes ---\n",
        "# Requirement: Triton tl.dot needs non-batch tile dims >= 16.\n",
        "# Docs/issues: https://github.com/openai/triton/issues/3709 , https://github.com/triton-lang/triton/issues/2266\n",
        "# Use chunk_size = 16 (power-of-two, >=16), and pad S up to 16\n",
        "S_pad = ((S_true + 15)//16)*16   # -> 16\n",
        "y_lim = get_mlstm_kernel(\"chunkwise--triton_limit_chunk\")(\n",
        "    **mk_inputs(S_pad), chunk_size=16\n",
        ")\n",
        "y_lim = y_lim[:, :, :S_true, :]  # drop padding\n",
        "print(\"OK limit_chunk padded:\", tuple(y_lim.shape))\n",
        "\n",
        "# ---------------- Safe fallback you can always use during training -------------\n",
        "print(\"\\n=== Safe training fallback: parallel--native_custbw (no Triton) ===\")\n",
        "y_native = get_mlstm_kernel(\"parallel--native_custbw\")(**mk_inputs(S_true))\n",
        "print(\"OK native parallel:\", tuple(y_native.shape))\n",
        "\n",
        "# Notes:\n",
        "# - HF xLSTM config and 'return_last_states' flag: https://huggingface.co/docs/transformers/model_doc/xlstm\n",
        "# - NX-AI xLSTM-7B config keeps 'return_last_states': true: https://huggingface.co/NX-AI/xLSTM-7b/blob/main/config.json\n",
        "# - Kernel families and names (xl_chunk vs limit_chunk, parallel/native): https://github.com/nx-ai/mlstm_kernels\n",
        "# - TFLA paper for kernel details and tiling assumptions: https://arxiv.org/abs/2503.14376\n",
        "\n",
        "\"\"\"\n",
        "torch 2.5.1+cu124\n",
        "triton 3.1.0\n",
        "mlstm_kernels 2.0.1\n",
        "xlstm 2.0.5\n",
        "available kernels: ['chunkwise--native_autograd', 'chunkwise--native_custbw', 'chunkwise--triton_limit_chunk', 'chunkwise--triton_xl_chunk', 'chunkwise--triton_xl_chunk_siging', 'parallel--native_autograd', 'parallel--native_custbw', 'parallel--native_stablef_autograd', 'parallel--native_stablef_custbw', 'parallel--triton_limit_headdim', 'parallel--native_siging_autograd', 'parallel--native_siging_custbw']\n",
        "\n",
        "=== REPRO 1: HF xLSTM with return_last_states=False ===\n",
        "Expected ERROR: ValueError not enough values to unpack (expected 2, got 1)\n",
        "=== FIX 1: HF xLSTM with return_last_states=True ===\n",
        "OK: forward with return_last_states=True\n",
        "\n",
        "=== REPRO 2: Triton 'xl_chunk' with S=6 (expect constraint failure) ===\n",
        "Expected ERROR: AssertionError Sequence length must be divisible by 16.\n",
        "=== FIX 2A: pad to S%16==0 for xl_chunk, then slice back ===\n",
        "OK xl_chunk padded: (1, 8, 16, 16)  -> keep only first 6\n",
        "OK limit_chunk padded: (1, 8, 6, 16)\n",
        "\n",
        "=== Safe training fallback: parallel--native_custbw (no Triton) ===\n",
        "OK native parallel: (1, 8, 6, 16)\n",
        "\"\"\""
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 1000
        },
        "id": "-ctCQ-6TRDw6",
        "outputId": "e7dc9258-c5ef-4783-e546-407a49125ecf"
      },
      "execution_count": 7,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Looking in indexes: https://download.pytorch.org/whl/cu124\n",
            "Requirement already satisfied: torch==2.5.1 in /usr/local/lib/python3.12/dist-packages (2.5.1+cu124)\n",
            "Requirement already satisfied: torchvision==0.20.1 in /usr/local/lib/python3.12/dist-packages (0.20.1+cu124)\n",
            "Requirement already satisfied: torchaudio==2.5.1 in /usr/local/lib/python3.12/dist-packages (2.5.1+cu124)\n",
            "Requirement already satisfied: filelock in /usr/local/lib/python3.12/dist-packages (from torch==2.5.1) (3.20.0)\n",
            "Requirement already satisfied: typing-extensions>=4.8.0 in /usr/local/lib/python3.12/dist-packages (from torch==2.5.1) (4.15.0)\n",
            "Requirement already satisfied: networkx in /usr/local/lib/python3.12/dist-packages (from torch==2.5.1) (3.5)\n",
            "Requirement already satisfied: jinja2 in /usr/local/lib/python3.12/dist-packages (from torch==2.5.1) (3.1.6)\n",
            "Requirement already satisfied: fsspec in /usr/local/lib/python3.12/dist-packages (from torch==2.5.1) (2025.3.0)\n",
            "Requirement already satisfied: nvidia-cuda-nvrtc-cu12==12.4.127 in /usr/local/lib/python3.12/dist-packages (from torch==2.5.1) (12.4.127)\n",
            "Requirement already satisfied: nvidia-cuda-runtime-cu12==12.4.127 in /usr/local/lib/python3.12/dist-packages (from torch==2.5.1) (12.4.127)\n",
            "Requirement already satisfied: nvidia-cuda-cupti-cu12==12.4.127 in /usr/local/lib/python3.12/dist-packages (from torch==2.5.1) (12.4.127)\n",
            "Requirement already satisfied: nvidia-cudnn-cu12==9.1.0.70 in /usr/local/lib/python3.12/dist-packages (from torch==2.5.1) (9.1.0.70)\n",
            "Requirement already satisfied: nvidia-cublas-cu12==12.4.5.8 in /usr/local/lib/python3.12/dist-packages (from torch==2.5.1) (12.4.5.8)\n",
            "Requirement already satisfied: nvidia-cufft-cu12==11.2.1.3 in /usr/local/lib/python3.12/dist-packages (from torch==2.5.1) (11.2.1.3)\n",
            "Requirement already satisfied: nvidia-curand-cu12==10.3.5.147 in /usr/local/lib/python3.12/dist-packages (from torch==2.5.1) (10.3.5.147)\n",
            "Requirement already satisfied: nvidia-cusolver-cu12==11.6.1.9 in /usr/local/lib/python3.12/dist-packages (from torch==2.5.1) (11.6.1.9)\n",
            "Requirement already satisfied: nvidia-cusparse-cu12==12.3.1.170 in /usr/local/lib/python3.12/dist-packages (from torch==2.5.1) (12.3.1.170)\n",
            "Requirement already satisfied: nvidia-nccl-cu12==2.21.5 in /usr/local/lib/python3.12/dist-packages (from torch==2.5.1) (2.21.5)\n",
            "Requirement already satisfied: nvidia-nvtx-cu12==12.4.127 in /usr/local/lib/python3.12/dist-packages (from torch==2.5.1) (12.4.127)\n",
            "Requirement already satisfied: nvidia-nvjitlink-cu12==12.4.127 in /usr/local/lib/python3.12/dist-packages (from torch==2.5.1) (12.4.127)\n",
            "Requirement already satisfied: triton==3.1.0 in /usr/local/lib/python3.12/dist-packages (from torch==2.5.1) (3.1.0)\n",
            "Requirement already satisfied: setuptools in /usr/local/lib/python3.12/dist-packages (from torch==2.5.1) (75.2.0)\n",
            "Requirement already satisfied: sympy==1.13.1 in /usr/local/lib/python3.12/dist-packages (from torch==2.5.1) (1.13.1)\n",
            "Requirement already satisfied: numpy in /usr/local/lib/python3.12/dist-packages (from torchvision==0.20.1) (2.0.2)\n",
            "Requirement already satisfied: pillow!=8.3.*,>=5.3.0 in /usr/local/lib/python3.12/dist-packages (from torchvision==0.20.1) (11.3.0)\n",
            "Requirement already satisfied: mpmath<1.4,>=1.1.0 in /usr/local/lib/python3.12/dist-packages (from sympy==1.13.1->torch==2.5.1) (1.3.0)\n",
            "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.12/dist-packages (from jinja2->torch==2.5.1) (3.0.3)\n",
            "Collecting triton==3.4.0\n",
            "  Using cached triton-3.4.0-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl.metadata (1.7 kB)\n",
            "Requirement already satisfied: setuptools>=40.8.0 in /usr/local/lib/python3.12/dist-packages (from triton==3.4.0) (75.2.0)\n",
            "Using cached triton-3.4.0-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl (155.6 MB)\n",
            "Installing collected packages: triton\n",
            "  Attempting uninstall: triton\n",
            "    Found existing installation: triton 3.1.0\n",
            "    Uninstalling triton-3.1.0:\n",
            "      Successfully uninstalled triton-3.1.0\n",
            "\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n",
            "torch 2.5.1+cu124 requires triton==3.1.0; platform_system == \"Linux\" and platform_machine == \"x86_64\" and python_version < \"3.13\", but you have triton 3.4.0 which is incompatible.\u001b[0m\u001b[31m\n",
            "\u001b[0mSuccessfully installed triton-3.4.0\n"
          ]
        },
        {
          "output_type": "display_data",
          "data": {
            "application/vnd.colab-display-data+json": {
              "pip_warning": {
                "packages": [
                  "triton"
                ]
              },
              "id": "87155ff16eb245418e68e56aaae7ac7f"
            }
          },
          "metadata": {}
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Requirement already satisfied: mlstm-kernels==2.0.1 in /usr/local/lib/python3.12/dist-packages (2.0.1)\n",
            "Requirement already satisfied: xlstm==2.0.5 in /usr/local/lib/python3.12/dist-packages (2.0.5)\n",
            "Requirement already satisfied: dacite in /usr/local/lib/python3.12/dist-packages (from mlstm-kernels==2.0.1) (1.9.2)\n",
            "Requirement already satisfied: einops in /usr/local/lib/python3.12/dist-packages (from mlstm-kernels==2.0.1) (0.8.1)\n",
            "Requirement already satisfied: ipykernel in /usr/local/lib/python3.12/dist-packages (from mlstm-kernels==2.0.1) (6.17.1)\n",
            "Requirement already satisfied: matplotlib in /usr/local/lib/python3.12/dist-packages (from mlstm-kernels==2.0.1) (3.10.0)\n",
            "Requirement already satisfied: numpy in /usr/local/lib/python3.12/dist-packages (from mlstm-kernels==2.0.1) (2.0.2)\n",
            "Requirement already satisfied: omegaconf in /usr/local/lib/python3.12/dist-packages (from mlstm-kernels==2.0.1) (2.3.0)\n",
            "Requirement already satisfied: rich in /usr/local/lib/python3.12/dist-packages (from mlstm-kernels==2.0.1) (13.9.4)\n",
            "Requirement already satisfied: torch in /usr/local/lib/python3.12/dist-packages (from mlstm-kernels==2.0.1) (2.5.1+cu124)\n",
            "Requirement already satisfied: tqdm in /usr/local/lib/python3.12/dist-packages (from mlstm-kernels==2.0.1) (4.67.1)\n",
            "Requirement already satisfied: opt_einsum in /usr/local/lib/python3.12/dist-packages (from xlstm==2.0.5) (3.4.0)\n",
            "Requirement already satisfied: transformers in /usr/local/lib/python3.12/dist-packages (from xlstm==2.0.5) (4.57.0)\n",
            "Requirement already satisfied: reportlab in /usr/local/lib/python3.12/dist-packages (from xlstm==2.0.5) (4.4.4)\n",
            "Requirement already satisfied: joypy in /usr/local/lib/python3.12/dist-packages (from xlstm==2.0.5) (0.2.6)\n",
            "Requirement already satisfied: ftfy in /usr/local/lib/python3.12/dist-packages (from xlstm==2.0.5) (6.3.1)\n",
            "Requirement already satisfied: ninja in /usr/local/lib/python3.12/dist-packages (from xlstm==2.0.5) (1.13.0)\n",
            "Requirement already satisfied: huggingface-hub in /usr/local/lib/python3.12/dist-packages (from xlstm==2.0.5) (0.35.3)\n",
            "Requirement already satisfied: tokenizers in /usr/local/lib/python3.12/dist-packages (from xlstm==2.0.5) (0.22.1)\n",
            "Requirement already satisfied: seaborn in /usr/local/lib/python3.12/dist-packages (from xlstm==2.0.5) (0.13.2)\n",
            "Requirement already satisfied: wcwidth in /usr/local/lib/python3.12/dist-packages (from ftfy->xlstm==2.0.5) (0.2.14)\n",
            "Requirement already satisfied: filelock in /usr/local/lib/python3.12/dist-packages (from huggingface-hub->xlstm==2.0.5) (3.20.0)\n",
            "Requirement already satisfied: fsspec>=2023.5.0 in /usr/local/lib/python3.12/dist-packages (from huggingface-hub->xlstm==2.0.5) (2025.3.0)\n",
            "Requirement already satisfied: packaging>=20.9 in /usr/local/lib/python3.12/dist-packages (from huggingface-hub->xlstm==2.0.5) (25.0)\n",
            "Requirement already satisfied: pyyaml>=5.1 in /usr/local/lib/python3.12/dist-packages (from huggingface-hub->xlstm==2.0.5) (6.0.3)\n",
            "Requirement already satisfied: requests in /usr/local/lib/python3.12/dist-packages (from huggingface-hub->xlstm==2.0.5) (2.32.4)\n",
            "Requirement already satisfied: typing-extensions>=3.7.4.3 in /usr/local/lib/python3.12/dist-packages (from huggingface-hub->xlstm==2.0.5) (4.15.0)\n",
            "Requirement already satisfied: hf-xet<2.0.0,>=1.1.3 in /usr/local/lib/python3.12/dist-packages (from huggingface-hub->xlstm==2.0.5) (1.1.10)\n",
            "Requirement already satisfied: debugpy>=1.0 in /usr/local/lib/python3.12/dist-packages (from ipykernel->mlstm-kernels==2.0.1) (1.8.15)\n",
            "Requirement already satisfied: ipython>=7.23.1 in /usr/local/lib/python3.12/dist-packages (from ipykernel->mlstm-kernels==2.0.1) (7.34.0)\n",
            "Requirement already satisfied: jupyter-client>=6.1.12 in /usr/local/lib/python3.12/dist-packages (from ipykernel->mlstm-kernels==2.0.1) (7.4.9)\n",
            "Requirement already satisfied: matplotlib-inline>=0.1 in /usr/local/lib/python3.12/dist-packages (from ipykernel->mlstm-kernels==2.0.1) (0.1.7)\n",
            "Requirement already satisfied: nest-asyncio in /usr/local/lib/python3.12/dist-packages (from ipykernel->mlstm-kernels==2.0.1) (1.6.0)\n",
            "Requirement already satisfied: psutil in /usr/local/lib/python3.12/dist-packages (from ipykernel->mlstm-kernels==2.0.1) (5.9.5)\n",
            "Requirement already satisfied: pyzmq>=17 in /usr/local/lib/python3.12/dist-packages (from ipykernel->mlstm-kernels==2.0.1) (26.2.1)\n",
            "Requirement already satisfied: tornado>=6.1 in /usr/local/lib/python3.12/dist-packages (from ipykernel->mlstm-kernels==2.0.1) (6.4.2)\n",
            "Requirement already satisfied: traitlets>=5.1.0 in /usr/local/lib/python3.12/dist-packages (from ipykernel->mlstm-kernels==2.0.1) (5.7.1)\n",
            "Requirement already satisfied: scipy>=0.11.0 in /usr/local/lib/python3.12/dist-packages (from joypy->xlstm==2.0.5) (1.16.2)\n",
            "Requirement already satisfied: pandas>=0.20.0 in /usr/local/lib/python3.12/dist-packages (from joypy->xlstm==2.0.5) (2.2.2)\n",
            "Requirement already satisfied: contourpy>=1.0.1 in /usr/local/lib/python3.12/dist-packages (from matplotlib->mlstm-kernels==2.0.1) (1.3.3)\n",
            "Requirement already satisfied: cycler>=0.10 in /usr/local/lib/python3.12/dist-packages (from matplotlib->mlstm-kernels==2.0.1) (0.12.1)\n",
            "Requirement already satisfied: fonttools>=4.22.0 in /usr/local/lib/python3.12/dist-packages (from matplotlib->mlstm-kernels==2.0.1) (4.60.1)\n",
            "Requirement already satisfied: kiwisolver>=1.3.1 in /usr/local/lib/python3.12/dist-packages (from matplotlib->mlstm-kernels==2.0.1) (1.4.9)\n",
            "Requirement already satisfied: pillow>=8 in /usr/local/lib/python3.12/dist-packages (from matplotlib->mlstm-kernels==2.0.1) (11.3.0)\n",
            "Requirement already satisfied: pyparsing>=2.3.1 in /usr/local/lib/python3.12/dist-packages (from matplotlib->mlstm-kernels==2.0.1) (3.2.5)\n",
            "Requirement already satisfied: python-dateutil>=2.7 in /usr/local/lib/python3.12/dist-packages (from matplotlib->mlstm-kernels==2.0.1) (2.9.0.post0)\n",
            "Requirement already satisfied: antlr4-python3-runtime==4.9.* in /usr/local/lib/python3.12/dist-packages (from omegaconf->mlstm-kernels==2.0.1) (4.9.3)\n",
            "Requirement already satisfied: charset-normalizer in /usr/local/lib/python3.12/dist-packages (from reportlab->xlstm==2.0.5) (3.4.3)\n",
            "Requirement already satisfied: markdown-it-py>=2.2.0 in /usr/local/lib/python3.12/dist-packages (from rich->mlstm-kernels==2.0.1) (4.0.0)\n",
            "Requirement already satisfied: pygments<3.0.0,>=2.13.0 in /usr/local/lib/python3.12/dist-packages (from rich->mlstm-kernels==2.0.1) (2.19.2)\n",
            "Requirement already satisfied: networkx in /usr/local/lib/python3.12/dist-packages (from torch->mlstm-kernels==2.0.1) (3.5)\n",
            "Requirement already satisfied: jinja2 in /usr/local/lib/python3.12/dist-packages (from torch->mlstm-kernels==2.0.1) (3.1.6)\n",
            "Requirement already satisfied: nvidia-cuda-nvrtc-cu12==12.4.127 in /usr/local/lib/python3.12/dist-packages (from torch->mlstm-kernels==2.0.1) (12.4.127)\n",
            "Requirement already satisfied: nvidia-cuda-runtime-cu12==12.4.127 in /usr/local/lib/python3.12/dist-packages (from torch->mlstm-kernels==2.0.1) (12.4.127)\n",
            "Requirement already satisfied: nvidia-cuda-cupti-cu12==12.4.127 in /usr/local/lib/python3.12/dist-packages (from torch->mlstm-kernels==2.0.1) (12.4.127)\n",
            "Requirement already satisfied: nvidia-cudnn-cu12==9.1.0.70 in /usr/local/lib/python3.12/dist-packages (from torch->mlstm-kernels==2.0.1) (9.1.0.70)\n",
            "Requirement already satisfied: nvidia-cublas-cu12==12.4.5.8 in /usr/local/lib/python3.12/dist-packages (from torch->mlstm-kernels==2.0.1) (12.4.5.8)\n",
            "Requirement already satisfied: nvidia-cufft-cu12==11.2.1.3 in /usr/local/lib/python3.12/dist-packages (from torch->mlstm-kernels==2.0.1) (11.2.1.3)\n",
            "Requirement already satisfied: nvidia-curand-cu12==10.3.5.147 in /usr/local/lib/python3.12/dist-packages (from torch->mlstm-kernels==2.0.1) (10.3.5.147)\n",
            "Requirement already satisfied: nvidia-cusolver-cu12==11.6.1.9 in /usr/local/lib/python3.12/dist-packages (from torch->mlstm-kernels==2.0.1) (11.6.1.9)\n",
            "Requirement already satisfied: nvidia-cusparse-cu12==12.3.1.170 in /usr/local/lib/python3.12/dist-packages (from torch->mlstm-kernels==2.0.1) (12.3.1.170)\n",
            "Requirement already satisfied: nvidia-nccl-cu12==2.21.5 in /usr/local/lib/python3.12/dist-packages (from torch->mlstm-kernels==2.0.1) (2.21.5)\n",
            "Requirement already satisfied: nvidia-nvtx-cu12==12.4.127 in /usr/local/lib/python3.12/dist-packages (from torch->mlstm-kernels==2.0.1) (12.4.127)\n",
            "Requirement already satisfied: nvidia-nvjitlink-cu12==12.4.127 in /usr/local/lib/python3.12/dist-packages (from torch->mlstm-kernels==2.0.1) (12.4.127)\n",
            "Collecting triton==3.1.0 (from torch->mlstm-kernels==2.0.1)\n",
            "  Using cached triton-3.1.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (1.3 kB)\n",
            "Requirement already satisfied: setuptools in /usr/local/lib/python3.12/dist-packages (from torch->mlstm-kernels==2.0.1) (75.2.0)\n",
            "Requirement already satisfied: sympy==1.13.1 in /usr/local/lib/python3.12/dist-packages (from torch->mlstm-kernels==2.0.1) (1.13.1)\n",
            "Requirement already satisfied: mpmath<1.4,>=1.1.0 in /usr/local/lib/python3.12/dist-packages (from sympy==1.13.1->torch->mlstm-kernels==2.0.1) (1.3.0)\n",
            "Requirement already satisfied: regex!=2019.12.17 in /usr/local/lib/python3.12/dist-packages (from transformers->xlstm==2.0.5) (2024.11.6)\n",
            "Requirement already satisfied: safetensors>=0.4.3 in /usr/local/lib/python3.12/dist-packages (from transformers->xlstm==2.0.5) (0.6.2)\n",
            "Requirement already satisfied: jedi>=0.16 in /usr/local/lib/python3.12/dist-packages (from ipython>=7.23.1->ipykernel->mlstm-kernels==2.0.1) (0.19.2)\n",
            "Requirement already satisfied: decorator in /usr/local/lib/python3.12/dist-packages (from ipython>=7.23.1->ipykernel->mlstm-kernels==2.0.1) (4.4.2)\n",
            "Requirement already satisfied: pickleshare in /usr/local/lib/python3.12/dist-packages (from ipython>=7.23.1->ipykernel->mlstm-kernels==2.0.1) (0.7.5)\n",
            "Requirement already satisfied: prompt-toolkit!=3.0.0,!=3.0.1,<3.1.0,>=2.0.0 in /usr/local/lib/python3.12/dist-packages (from ipython>=7.23.1->ipykernel->mlstm-kernels==2.0.1) (3.0.52)\n",
            "Requirement already satisfied: backcall in /usr/local/lib/python3.12/dist-packages (from ipython>=7.23.1->ipykernel->mlstm-kernels==2.0.1) (0.2.0)\n",
            "Requirement already satisfied: pexpect>4.3 in /usr/local/lib/python3.12/dist-packages (from ipython>=7.23.1->ipykernel->mlstm-kernels==2.0.1) (4.9.0)\n",
            "Requirement already satisfied: entrypoints in /usr/local/lib/python3.12/dist-packages (from jupyter-client>=6.1.12->ipykernel->mlstm-kernels==2.0.1) (0.4)\n",
            "Requirement already satisfied: jupyter-core>=4.9.2 in /usr/local/lib/python3.12/dist-packages (from jupyter-client>=6.1.12->ipykernel->mlstm-kernels==2.0.1) (5.8.1)\n",
            "Requirement already satisfied: mdurl~=0.1 in /usr/local/lib/python3.12/dist-packages (from markdown-it-py>=2.2.0->rich->mlstm-kernels==2.0.1) (0.1.2)\n",
            "Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.12/dist-packages (from pandas>=0.20.0->joypy->xlstm==2.0.5) (2025.2)\n",
            "Requirement already satisfied: tzdata>=2022.7 in /usr/local/lib/python3.12/dist-packages (from pandas>=0.20.0->joypy->xlstm==2.0.5) (2025.2)\n",
            "Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.12/dist-packages (from python-dateutil>=2.7->matplotlib->mlstm-kernels==2.0.1) (1.17.0)\n",
            "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.12/dist-packages (from jinja2->torch->mlstm-kernels==2.0.1) (3.0.3)\n",
            "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.12/dist-packages (from requests->huggingface-hub->xlstm==2.0.5) (3.10)\n",
            "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.12/dist-packages (from requests->huggingface-hub->xlstm==2.0.5) (2.5.0)\n",
            "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.12/dist-packages (from requests->huggingface-hub->xlstm==2.0.5) (2025.10.5)\n",
            "Requirement already satisfied: parso<0.9.0,>=0.8.4 in /usr/local/lib/python3.12/dist-packages (from jedi>=0.16->ipython>=7.23.1->ipykernel->mlstm-kernels==2.0.1) (0.8.5)\n",
            "Requirement already satisfied: platformdirs>=2.5 in /usr/local/lib/python3.12/dist-packages (from jupyter-core>=4.9.2->jupyter-client>=6.1.12->ipykernel->mlstm-kernels==2.0.1) (4.5.0)\n",
            "Requirement already satisfied: ptyprocess>=0.5 in /usr/local/lib/python3.12/dist-packages (from pexpect>4.3->ipython>=7.23.1->ipykernel->mlstm-kernels==2.0.1) (0.7.0)\n",
            "Using cached triton-3.1.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (209.6 MB)\n",
            "Installing collected packages: triton\n",
            "  Attempting uninstall: triton\n",
            "    Found existing installation: triton 3.4.0\n",
            "    Uninstalling triton-3.4.0:\n",
            "      Successfully uninstalled triton-3.4.0\n",
            "Successfully installed triton-3.1.0\n"
          ]
        },
        {
          "output_type": "display_data",
          "data": {
            "application/vnd.colab-display-data+json": {
              "pip_warning": {
                "packages": [
                  "triton"
                ]
              },
              "id": "0c09674fc7754dce92867e28c21fe7a0"
            }
          },
          "metadata": {}
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "torch 2.5.1+cu124\n",
            "triton 3.1.0\n",
            "mlstm_kernels 2.0.1\n",
            "xlstm 2.0.5\n",
            "available kernels: ['chunkwise--native_autograd', 'chunkwise--native_custbw', 'chunkwise--triton_limit_chunk', 'chunkwise--triton_xl_chunk', 'chunkwise--triton_xl_chunk_siging', 'parallel--native_autograd', 'parallel--native_custbw', 'parallel--native_stablef_autograd', 'parallel--native_stablef_custbw', 'parallel--triton_limit_headdim', 'parallel--native_siging_autograd', 'parallel--native_siging_custbw']\n",
            "\n",
            "=== REPRO 1: HF xLSTM with return_last_states=False ===\n",
            "Expected ERROR: ValueError not enough values to unpack (expected 2, got 1)\n",
            "=== FIX 1: HF xLSTM with return_last_states=True ===\n",
            "OK: forward with return_last_states=True\n",
            "\n",
            "=== REPRO 2: Triton 'xl_chunk' with S=6 (expect constraint failure) ===\n",
            "Expected ERROR: AssertionError Sequence length must be divisible by 16.\n",
            "=== FIX 2A: pad to S%16==0 for xl_chunk, then slice back ===\n",
            "OK xl_chunk padded: (1, 8, 16, 16)  -> keep only first 6\n",
            "OK limit_chunk padded: (1, 8, 6, 16)\n",
            "\n",
            "=== Safe training fallback: parallel--native_custbw (no Triton) ===\n",
            "OK native parallel: (1, 8, 6, 16)\n"
          ]
        },
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "\"\\ntorch 2.5.1+cu124\\ntriton 3.1.0\\nmlstm_kernels 2.0.1\\nxlstm 2.0.5\\navailable kernels: ['chunkwise--native_autograd', 'chunkwise--native_custbw', 'chunkwise--triton_limit_chunk', 'chunkwise--triton_xl_chunk', 'chunkwise--triton_xl_chunk_siging', 'parallel--native_autograd', 'parallel--native_custbw', 'parallel--native_stablef_autograd', 'parallel--native_stablef_custbw', 'parallel--triton_limit_headdim', 'parallel--native_siging_autograd', 'parallel--native_siging_custbw']\\n\\n=== REPRO 1: HF xLSTM with return_last_states=False ===\\nExpected ERROR: ValueError not enough values to unpack (expected 2, got 1)\\n=== FIX 1: HF xLSTM with return_last_states=True ===\\nOK: forward with return_last_states=True\\n\\n=== REPRO 2: Triton 'xl_chunk' with S=6 (expect constraint failure) ===\\nExpected ERROR: AssertionError Sequence length must be divisible by 16.\\n=== FIX 2A: pad to S%16==0 for xl_chunk, then slice back ===\\nOK xl_chunk padded: (1, 8, 16, 16)  -> keep only first 6\\nOK limit_chunk padded: (1, 8, 6, 16)\\n\\n=== Safe training fallback: parallel--native_custbw (no Triton) ===\\nOK native parallel: (1, 8, 6, 16)\\n\""
            ],
            "application/vnd.google.colaboratory.intrinsic+json": {
              "type": "string"
            }
          },
          "metadata": {},
          "execution_count": 7
        }
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 8,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 811,
          "referenced_widgets": [
            "0dd10c3eecd64462acc56eae1fb82cb2",
            "00e3757b55ad448ea050a8d40d9b09be",
            "596645777acb4001a5da04819d7e105a",
            "b388d68e27a84c84b597bab1dc0db5b7",
            "bceb8ff98b6345dc830a0756e8dc37b0",
            "5010a6f07ce44d8ca49b4ecf41140d54",
            "515f08efb3ea4a8cbd6e8ef8481d5acb",
            "b7b530b3a2a1427b8a3759614d120772",
            "4e03d98ee14a4ffcb7bbe62718677602",
            "847ef889c21a461fb7e77d54d0edab23",
            "905292f5101949f6a63444349ba68464",
            "65f21cb5cf6c431a9a36311edf3529c2",
            "260c186ea38641c89509aac0177ca4fb",
            "99e74f703946490eb93ecf30cd0c85b3",
            "20d36aa22bb04bcca1e69abc84952189",
            "66f050dd0cb94b62ae753ad4279b2f24",
            "f0ed43c3d46a4920a65a02e3485fec39",
            "b109e13ef458459f9313ee84d1c41cb8",
            "0a90d915d85242f08bda898768f3c000",
            "6840601971b9433cacf8acdc81f68620",
            "f3b04b0c018e497a9308afff257deec1",
            "0bab38ea74c74f8580ec5bbeeccb3df4",
            "98332b54dec24f858dfab05f53904eec",
            "221bc1ead32a4d67932219d21920aa07",
            "33d1b4a6dd404a7db3610d76704e1b6a",
            "a49c500da1904adca1261ecd4ba47feb",
            "5f729ca3e83840e0b3b4bbe4f9d39437",
            "de66545cc5394ccd819c42f0ab2fe03e",
            "2fb52d0cdb8d4196a01bfe40a3543a74",
            "8f833984323b40569ce4094a01c64d86",
            "ba1757cbd6be4152af1198cdb3f4cc6e",
            "39429f94fea246f5b88daea53b2bcd32",
            "9736cfb561024660bd4bfad6d8242b5a"
          ]
        },
        "id": "G3PCNcYfQBhB",
        "outputId": "ae2b8b07-0450-4ba8-8c70-26c8708f4555"
      },
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "--- RUNNING INFERENCE ON THE FINAL MERGED MODEL ---\n",
            "Defining a safe, native kernel configuration for compatibility...\n",
            "Loading the final, merged model in bfloat16 (no quantization for compatibility)...\n"
          ]
        },
        {
          "output_type": "display_data",
          "data": {
            "text/plain": [
              "Fetching 3 files:   0%|          | 0/3 [00:00<?, ?it/s]"
            ],
            "application/vnd.jupyter.widget-view+json": {
              "version_major": 2,
              "version_minor": 0,
              "model_id": "0dd10c3eecd64462acc56eae1fb82cb2"
            }
          },
          "metadata": {}
        },
        {
          "output_type": "display_data",
          "data": {
            "text/plain": [
              "Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]"
            ],
            "application/vnd.jupyter.widget-view+json": {
              "version_major": 2,
              "version_minor": 0,
              "model_id": "65f21cb5cf6c431a9a36311edf3529c2"
            }
          },
          "metadata": {}
        },
        {
          "output_type": "display_data",
          "data": {
            "text/plain": [
              "chat_template.jinja:   0%|          | 0.00/3.43k [00:00<?, ?B/s]"
            ],
            "application/vnd.jupyter.widget-view+json": {
              "version_major": 2,
              "version_minor": 0,
              "model_id": "98332b54dec24f858dfab05f53904eec"
            }
          },
          "metadata": {}
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Padding token has been set.\n"
          ]
        },
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "xLSTMForCausalLM(\n",
              "  (backbone): xLSTMModel(\n",
              "    (embeddings): Embedding(50560, 4096)\n",
              "    (blocks): ModuleList(\n",
              "      (0-31): 32 x mLSTMBlock(\n",
              "        (norm_mlstm): RMSNorm()\n",
              "        (mlstm_layer): mLSTMLayer(\n",
              "          (q): Linear(in_features=4096, out_features=2048, bias=False)\n",
              "          (k): Linear(in_features=4096, out_features=2048, bias=False)\n",
              "          (v): Linear(in_features=4096, out_features=4096, bias=False)\n",
              "          (ogate_preact): Linear(in_features=4096, out_features=4096, bias=False)\n",
              "          (igate_preact): Linear(in_features=4096, out_features=8, bias=True)\n",
              "          (fgate_preact): Linear(in_features=4096, out_features=8, bias=True)\n",
              "          (ogate_act_fn): Sigmoid()\n",
              "          (mlstm_backend): mLSTMBackend(mLSTMBackendConfig(chunkwise_kernel='chunkwise--native_autograd', sequence_kernel='native_sequence__native', step_kernel='native', mode='inference', chunk_size=64, return_last_states=False, autocast_kernel_dtype='bfloat16', eps=1e-06, inference_state_dtype='float32', normalize_siging=True))\n",
              "          (multihead_norm): MultiHeadLayerNorm()\n",
              "          (out_proj): Linear(in_features=4096, out_features=4096, bias=False)\n",
              "        )\n",
              "        (norm_ffn): RMSNorm()\n",
              "        (ffn): FeedForward(\n",
              "          (proj_up_gate): Linear(in_features=4096, out_features=10944, bias=False)\n",
              "          (proj_up): Linear(in_features=4096, out_features=10944, bias=False)\n",
              "          (proj_down): Linear(in_features=10944, out_features=4096, bias=False)\n",
              "          (act_fn): SiLU()\n",
              "        )\n",
              "      )\n",
              "    )\n",
              "    (out_norm): RMSNorm()\n",
              "  )\n",
              "  (lm_head): Linear(in_features=4096, out_features=50560, bias=False)\n",
              ")"
            ]
          },
          "metadata": {},
          "execution_count": 8
        }
      ],
      "source": [
        "import torch\n",
        "from transformers import AutoModelForCausalLM, AutoTokenizer, AutoConfig\n",
        "\n",
        "MERGED_MODEL_PATH = \"ethicalabs/xLSTM-7b-Instruct\"\n",
        "\n",
        "print(\"--- RUNNING INFERENCE ON THE FINAL MERGED MODEL ---\")\n",
        "\n",
        "# Define the \"Key Config\" for Maximum Compatibility\n",
        "# We apply a configuration that uses native, hardware-agnostic kernels.\n",
        "print(\"Defining a safe, native kernel configuration for compatibility...\")\n",
        "safe_config = AutoConfig.from_pretrained(MERGED_MODEL_PATH, trust_remote_code=True)\n",
        "# Use the stable, native parallel kernel\n",
        "safe_config.chunkwise_kernel = \"chunkwise--native_autograd\"\n",
        "safe_config.sequence_kernel = \"native_sequence__native\"\n",
        "safe_config.step_kernel = \"native\"\n",
        "# This flag is still required for the HF implementation to avoid unpacking errors\n",
        "safe_config.return_last_states = False\n",
        "\n",
        "# Step 2: Load the final, merged model with the safe config (no quantization)\n",
        "print(\"Loading the final, merged model in bfloat16 (no quantization for compatibility)...\")\n",
        "final_model = AutoModelForCausalLM.from_pretrained(\n",
        "    MERGED_MODEL_PATH,\n",
        "    device_map=\"auto\",\n",
        "    torch_dtype=torch.bfloat16,\n",
        "    trust_remote_code=True,\n",
        "    config=safe_config\n",
        ")\n",
        "final_tokenizer = AutoTokenizer.from_pretrained(MERGED_MODEL_PATH)\n",
        "\n",
        "# The tokenizer needs to know which token to use for padding.\n",
        "if final_tokenizer.pad_token is None:\n",
        "    final_tokenizer.pad_token = final_tokenizer.eos_token\n",
        "print(\"Padding token has been set.\")\n",
        "\n",
        "\n",
        "# Set the model to evaluation mode, which is standard practice for inference\n",
        "final_model.eval()"
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "# Step 3: Run Inference\n",
        "print(\"Preparing prompt and running generation...\")\n",
        "messages = [{\"role\": \"user\", \"content\": \"Please suggest me some mindfullness relaxation techniques to overcome the frustration I have when I deal with Triton kernels.\"}]\n",
        "\n",
        "prompt_string = final_tokenizer.apply_chat_template(\n",
        "    messages, add_generation_prompt=True, tokenize=False\n",
        ")\n",
        "\n",
        "inputs = final_tokenizer(\n",
        "    prompt_string,\n",
        "    return_tensors=\"pt\"\n",
        ").to(final_model.device)\n",
        "\n",
        "with torch.no_grad():\n",
        "    outputs = final_model.generate(**inputs, max_new_tokens=512)\n",
        "    output_ids = outputs[0][len(inputs.input_ids[0]) :]\n",
        "\n",
        "response_text = final_tokenizer.decode(output_ids, skip_special_tokens=True)\n",
        "print(\"\\n--- Generated Response ---\")\n",
        "print(response_text)"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "4sO5_6kDXmtO",
        "outputId": "d3974dfe-5fbe-4c09-d88e-3dde3444620b"
      },
      "execution_count": 9,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Preparing prompt and running generation...\n",
            "\n",
            "--- Generated Response ---\n",
            "1. Mindful breathing: Take a few deep breaths and focus on your breath. Feel the air entering and leaving your body. This can help you calm down and focus on the present moment.\n",
            "\n",
            "2. Body scan: Lie down or sit comfortably and focus on each part of your body, starting from your toes and moving up to your head. Notice any tension or discomfort and try to release it.\n",
            "\n",
            "3. Visualization: Close your eyes and imagine yourself in a peaceful place, such as a beach or forest. Focus on the sights, sounds, and smells around you. This can help you relax and reduce stress.\n",
            "\n",
            "4. Progressive muscle relaxation: Tense and then relax each muscle group in your body, starting from your toes and moving up to your head. This can help you release tension and relax your body.\n",
            "\n",
            "5. Mantra repetition: Repeat a calming phrase or mantra to yourself, such as \"I am calm\" or \"I am relaxed.\" This can help you focus your mind and reduce stress.\n",
            "\n",
            "6. Walking meditation: Take a slow, mindful walk and focus on each step you take. Notice the feeling of your feet touching the ground and the movement of your legs. This can help you reduce stress and increase mindfulness.\n",
            "\n",
            "7. Yoga: Practice yoga poses that focus on relaxation and mindfulness, such as child's pose, downward facing dog, and savasana. This can help you reduce stress and increase mindfulness.\n",
            "\n",
            "8. Guided meditation: Listen to a guided meditation or relaxation exercise. This can help you focus your mind and reduce stress.\n",
            "\n",
            "9. Mindful eating: Take a few deep breaths and focus on the taste, texture, and smell of your food. This can help you reduce stress and increase mindfulness.\n",
            "\n",
            "10. Nature walk: Take a slow, mindful walk in nature and focus on the sights, sounds, and smells around you. This can help you reduce stress and increase mindfulness.\n",
            "\n",
            "Remember to practice these techniques regularly to reduce stress and increase mindfulness.\n",
            "\n",
            "You can also try using apps such as Headspace, Calm, or Insight Timer to help you practice mindfulness and relaxation techniques. These apps offer guided meditations, breathing exercises, and other mindfulness practices.\n",
            "\n",
            "Remember to be patient and consistent with your practice, and don't be afraid to try different techniques to find what works best for you.\n",
            "\n",
            "You can also try seeking support from a therapist or counselor if you are struggling with\n"
          ]
        }
      ]
    }
  ]
}
No results found