Created
October 24, 2025 14:07
-
-
Save Yugsolanki/eea86a762d519449a6e254cc8ef84c7e to your computer and use it in GitHub Desktop.
hindi_stt_issue.ipynb
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| { | |
| "cells": [ | |
| { | |
| "cell_type": "markdown", | |
| "metadata": { | |
| "id": "view-in-github", | |
| "colab_type": "text" | |
| }, | |
| "source": [ | |
| "<a href=\"https://colab.research.google.com/gist/Yugsolanki/eea86a762d519449a6e254cc8ef84c7e/hindi_stt_issue.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "source": [ | |
| "# Installation & Extraction" | |
| ], | |
| "metadata": { | |
| "id": "bqGHCtFwgdIt" | |
| } | |
| }, | |
| { | |
| "cell_type": "code", | |
| "source": [ | |
| "!uv install gdown\n", | |
| "\n", | |
| "import gdown\n", | |
| "\n", | |
| "url = 'https://drive.google.com/uc?export=download&id=1tNhIbvKxc__9x9xnoI-AsOh2qMxYUwM8'\n", | |
| "output = 'colab_data.zip'\n", | |
| "gdown.download(url, output, quiet=False)\n", | |
| "\n", | |
| "!unzip colab_data.zip" | |
| ], | |
| "metadata": { | |
| "id": "G4MiTp2if4sT" | |
| }, | |
| "execution_count": null, | |
| "outputs": [] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": null, | |
| "metadata": { | |
| "id": "NDCbYfESJ0kM" | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "!uv pip install pytorch-lightning nemo-toolkit[asr] \"cuda-python==12.8.0\" \"numba-cuda==0.4.0\" \"numpy==1.26.4\" megatron megatron-core" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "source": [ | |
| "---" | |
| ], | |
| "metadata": { | |
| "id": "ZPUF8whmgsAt" | |
| } | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "source": [ | |
| "# Training" | |
| ], | |
| "metadata": { | |
| "id": "A9J1XCrDgsf4" | |
| } | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": null, | |
| "metadata": { | |
| "id": "pwOn35rwKx8w" | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "# Training the model (Hindi streaming FastConformer Hybrid RNNT+CTC with SPE-Unigram)\n", | |
| "import lightning.pytorch as pl\n", | |
| "from omegaconf import OmegaConf\n", | |
| "\n", | |
| "from nemo.collections.asr.models import EncDecHybridRNNTCTCBPEModel\n", | |
| "from nemo.core.config import hydra_runner\n", | |
| "from nemo.utils import logging\n", | |
| "from nemo.utils.exp_manager import exp_manager\n", | |
| "from nemo.utils.trainer_utils import resolve_trainer_cfg\n", | |
| "from nemo.utils.exp_manager import EarlyStopping, ModelCheckpoint" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": null, | |
| "metadata": { | |
| "id": "gFhaWSiXK93P" | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "def main(cfg):\n", | |
| " logging.info(f'Hydra config:\\n{OmegaConf.to_yaml(cfg)}')\n", | |
| "\n", | |
| " try:\n", | |
| " early_stopping_callback = EarlyStopping(\n", | |
| " monitor=\"val_wer\",\n", | |
| " mode=\"min\",\n", | |
| " patience=10,\n", | |
| " min_delta=0.0001,\n", | |
| " verbose=True\n", | |
| " )\n", | |
| "\n", | |
| " checkpoint_callback = ModelCheckpoint(\n", | |
| " monitor=\"val_wer\",\n", | |
| " mode=\"min\",\n", | |
| " save_top_k=5,\n", | |
| " every_n_train_steps=2000,\n", | |
| " filename=\"${name}--{reduced_train_loss:.2f}-{step}\"\n", | |
| " )\n", | |
| "\n", | |
| " trainer = pl.Trainer(**resolve_trainer_cfg(cfg.trainer),callbacks=[early_stopping_callback, checkpoint_callback])\n", | |
| " exp_manager(trainer, cfg.get(\"exp_manager\", None))\n", | |
| " asr_model = EncDecHybridRNNTCTCBPEModel.from_config_dict(cfg.model, trainer=trainer)\n", | |
| "\n", | |
| " asr_model.change_vocabulary(\n", | |
| " new_tokenizer_dir=cfg.model.tokenizer.dir,\n", | |
| " new_tokenizer_type=cfg.model.tokenizer.type\n", | |
| " )\n", | |
| "\n", | |
| " logging.info(\"Model configuration AFTER all updates:\")\n", | |
| " logging.info(asr_model.cfg)\n", | |
| "\n", | |
| " # Initialize the weights of the model from another model, if provided via config\n", | |
| " asr_model.maybe_init_from_pretrained_checkpoint(cfg)\n", | |
| "\n", | |
| " trainer.fit(asr_model)\n", | |
| "\n", | |
| " if hasattr(cfg.model, 'test_ds') and cfg.model.test_ds.manifest_filepath is not None:\n", | |
| " if asr_model.prepare_test(trainer):\n", | |
| " trainer.test(asr_model)\n", | |
| "\n", | |
| " if not cfg.exp_manager.get(\"create_checkpoint_callback\", True):\n", | |
| " final_save_path = os.path.join(\n", | |
| " cfg.exp_manager.exp_dir,\n", | |
| " cfg.exp_manager.name,\n", | |
| " \"final_model.nemo\"\n", | |
| " )\n", | |
| " logging.info(f\"Saving final model to: {final_save_path}\")\n", | |
| " asr_model.save_to(final_save_path)\n", | |
| " except KeyboardInterrupt:\n", | |
| " logging.warning(\"Training interrupted by user (Ctrl+C)\")\n", | |
| " logging.info(\"Attempting to save checkpoint before exit...\")\n", | |
| " raise\n", | |
| "\n", | |
| " except Exception as e:\n", | |
| " logging.error(\"=\" * 80)\n", | |
| " logging.error(f\"Training failed with error: {str(e)}\")\n", | |
| " logging.error(\"=\" * 80)\n", | |
| " raise" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": null, | |
| "metadata": { | |
| "id": "hk-m_VLkNz-9" | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "from omegaconf import OmegaConf\n", | |
| "\n", | |
| "cfg = OmegaConf.load(\"/content/colab_data/config.yaml\")\n", | |
| "\n", | |
| "main(cfg)" | |
| ] | |
| } | |
| ], | |
| "metadata": { | |
| "accelerator": "GPU", | |
| "colab": { | |
| "gpuType": "T4", | |
| "provenance": [], | |
| "mount_file_id": "1_1IYEQ4UtTDwYDxzhMM3wQ0U5tT9gZqu", | |
| "authorship_tag": "ABX9TyP4soCKrbX8v6yuwVc3V8fw", | |
| "include_colab_link": true | |
| }, | |
| "kernelspec": { | |
| "display_name": "Python 3", | |
| "name": "python3" | |
| }, | |
| "language_info": { | |
| "name": "python" | |
| } | |
| }, | |
| "nbformat": 4, | |
| "nbformat_minor": 0 | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment