Skip to content

Instantly share code, notes, and snippets.

@Yugsolanki
Created October 24, 2025 14:07
Show Gist options
  • Select an option

  • Save Yugsolanki/eea86a762d519449a6e254cc8ef84c7e to your computer and use it in GitHub Desktop.

Select an option

Save Yugsolanki/eea86a762d519449a6e254cc8ef84c7e to your computer and use it in GitHub Desktop.
hindi_stt_issue.ipynb
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "markdown",
"metadata": {
"id": "view-in-github",
"colab_type": "text"
},
"source": [
"<a href=\"https://colab.research.google.com/gist/Yugsolanki/eea86a762d519449a6e254cc8ef84c7e/hindi_stt_issue.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
]
},
{
"cell_type": "markdown",
"source": [
"# Installation & Extraction"
],
"metadata": {
"id": "bqGHCtFwgdIt"
}
},
{
"cell_type": "code",
"source": [
"!uv install gdown\n",
"\n",
"import gdown\n",
"\n",
"url = 'https://drive.google.com/uc?export=download&id=1tNhIbvKxc__9x9xnoI-AsOh2qMxYUwM8'\n",
"output = 'colab_data.zip'\n",
"gdown.download(url, output, quiet=False)\n",
"\n",
"!unzip colab_data.zip"
],
"metadata": {
"id": "G4MiTp2if4sT"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "NDCbYfESJ0kM"
},
"outputs": [],
"source": [
"!uv pip install pytorch-lightning nemo-toolkit[asr] \"cuda-python==12.8.0\" \"numba-cuda==0.4.0\" \"numpy==1.26.4\" megatron megatron-core"
]
},
{
"cell_type": "markdown",
"source": [
"---"
],
"metadata": {
"id": "ZPUF8whmgsAt"
}
},
{
"cell_type": "markdown",
"source": [
"# Training"
],
"metadata": {
"id": "A9J1XCrDgsf4"
}
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "pwOn35rwKx8w"
},
"outputs": [],
"source": [
"# Training the model (Hindi streaming FastConformer Hybrid RNNT+CTC with SPE-Unigram)\n",
"import lightning.pytorch as pl\n",
"from omegaconf import OmegaConf\n",
"\n",
"from nemo.collections.asr.models import EncDecHybridRNNTCTCBPEModel\n",
"from nemo.core.config import hydra_runner\n",
"from nemo.utils import logging\n",
"from nemo.utils.exp_manager import exp_manager\n",
"from nemo.utils.trainer_utils import resolve_trainer_cfg\n",
"from nemo.utils.exp_manager import EarlyStopping, ModelCheckpoint"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "gFhaWSiXK93P"
},
"outputs": [],
"source": [
"def main(cfg):\n",
" logging.info(f'Hydra config:\\n{OmegaConf.to_yaml(cfg)}')\n",
"\n",
" try:\n",
" early_stopping_callback = EarlyStopping(\n",
" monitor=\"val_wer\",\n",
" mode=\"min\",\n",
" patience=10,\n",
" min_delta=0.0001,\n",
" verbose=True\n",
" )\n",
"\n",
" checkpoint_callback = ModelCheckpoint(\n",
" monitor=\"val_wer\",\n",
" mode=\"min\",\n",
" save_top_k=5,\n",
" every_n_train_steps=2000,\n",
" filename=\"${name}--{reduced_train_loss:.2f}-{step}\"\n",
" )\n",
"\n",
" trainer = pl.Trainer(**resolve_trainer_cfg(cfg.trainer),callbacks=[early_stopping_callback, checkpoint_callback])\n",
" exp_manager(trainer, cfg.get(\"exp_manager\", None))\n",
" asr_model = EncDecHybridRNNTCTCBPEModel.from_config_dict(cfg.model, trainer=trainer)\n",
"\n",
" asr_model.change_vocabulary(\n",
" new_tokenizer_dir=cfg.model.tokenizer.dir,\n",
" new_tokenizer_type=cfg.model.tokenizer.type\n",
" )\n",
"\n",
" logging.info(\"Model configuration AFTER all updates:\")\n",
" logging.info(asr_model.cfg)\n",
"\n",
" # Initialize the weights of the model from another model, if provided via config\n",
" asr_model.maybe_init_from_pretrained_checkpoint(cfg)\n",
"\n",
" trainer.fit(asr_model)\n",
"\n",
" if hasattr(cfg.model, 'test_ds') and cfg.model.test_ds.manifest_filepath is not None:\n",
" if asr_model.prepare_test(trainer):\n",
" trainer.test(asr_model)\n",
"\n",
" if not cfg.exp_manager.get(\"create_checkpoint_callback\", True):\n",
" final_save_path = os.path.join(\n",
" cfg.exp_manager.exp_dir,\n",
" cfg.exp_manager.name,\n",
" \"final_model.nemo\"\n",
" )\n",
" logging.info(f\"Saving final model to: {final_save_path}\")\n",
" asr_model.save_to(final_save_path)\n",
" except KeyboardInterrupt:\n",
" logging.warning(\"Training interrupted by user (Ctrl+C)\")\n",
" logging.info(\"Attempting to save checkpoint before exit...\")\n",
" raise\n",
"\n",
" except Exception as e:\n",
" logging.error(\"=\" * 80)\n",
" logging.error(f\"Training failed with error: {str(e)}\")\n",
" logging.error(\"=\" * 80)\n",
" raise"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "hk-m_VLkNz-9"
},
"outputs": [],
"source": [
"from omegaconf import OmegaConf\n",
"\n",
"cfg = OmegaConf.load(\"/content/colab_data/config.yaml\")\n",
"\n",
"main(cfg)"
]
}
],
"metadata": {
"accelerator": "GPU",
"colab": {
"gpuType": "T4",
"provenance": [],
"mount_file_id": "1_1IYEQ4UtTDwYDxzhMM3wQ0U5tT9gZqu",
"authorship_tag": "ABX9TyP4soCKrbX8v6yuwVc3V8fw",
"include_colab_link": true
},
"kernelspec": {
"display_name": "Python 3",
"name": "python3"
},
"language_info": {
"name": "python"
}
},
"nbformat": 4,
"nbformat_minor": 0
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment