Skip to content

Instantly share code, notes, and snippets.

@Davidnet
Created January 27, 2026 17:59
Show Gist options
  • Select an option

  • Save Davidnet/e90e7dd2629547587b90397d5fb93bcc to your computer and use it in GitHub Desktop.

Select an option

Save Davidnet/e90e7dd2629547587b90397d5fb93bcc to your computer and use it in GitHub Desktop.
import time
from metaflow import FlowSpec, step, Parameter
class MockPIITrainingPipeline(FlowSpec):
"""
Mock pipeline for testing Flow orchestration without heavy dependencies or GPU.
"""
skip_export = Parameter('skip_export', default=False, help="Skip export step")
skip_quantization = Parameter('skip_quantization', default=False, help="Skip quantization step")
@step
def start(self):
print("Starting Mock PII Training Pipeline...")
time.sleep(5)
self.next(
{False: self.export_data, True: self.preprocess_data},
condition="skip_export",
)
@step
def export_data(self):
print("Mock: Exporting data from Label Studio...")
time.sleep(5)
self.next(self.preprocess_data)
@step
def preprocess_data(self):
print("Mock: Preprocessing training data...")
time.sleep(5)
self.next(self.train_model)
@step
def train_model(self):
print("Mock: Training the PII detection model...")
time.sleep(5)
self.training_metrics = {
"eval_pii_f1_weighted": 0.95,
"training_time_seconds": 5
}
raise RuntimeError("Simulated training error for testing purposes")
self.next(self.evaluate_model)
@step
def evaluate_model(self):
print("Mock: Evaluating model performance...")
time.sleep(5)
self.next(
{False: self.quantize_model, True: self.sign_model},
condition="skip_quantization",
)
@step
def quantize_model(self):
print("Mock: Quantizing model to ONNX format...")
time.sleep(5)
self.next(self.sign_model)
@step
def sign_model(self):
print("Mock: Signing model with cryptographic hash...")
time.sleep(5)
self.next(self.end)
@step
def end(self):
print("Mock Pipeline complete.")
time.sleep(5)
if __name__ == '__main__':
MockPIITrainingPipeline()
@Davidnet
Copy link
Author

Launch with:

#!/bin/bash
set -e

echo "========================================"
echo "PII Detection Model Training Pipeline"
echo "========================================"
echo ""

# Default to local run
CONFIG_FLAG=""
EXTRA_ARGS=""

while [[ $# -gt 0 ]]; do
    case $1 in
        -h|--help)
            echo "Usage: ./run_training.sh [OPTIONS]"
            echo ""
            echo "Options:"
            echo "  --config FILE   Use custom config file (default: training_config.toml)"
            echo "  -h, --help      Show this help"
            echo ""
            echo "Examples:"
            echo "  ./run_training.sh                       # Run with default config"
            echo "  ./run_training.sh --config prod.toml    # Use custom config"
            echo ""
            echo "Note: Edit training_config.toml to change epochs, batch size, etc."
            exit 0
            ;;
        --config)
            # Resolve to absolute path
            CONFIG_PATH="$(cd "$(dirname "$2")" && pwd)/$(basename "$2")"
            CONFIG_FLAG="--config config-file $CONFIG_PATH"
            shift 2
            ;;
        *)
            EXTRA_ARGS="$EXTRA_ARGS $1"
            shift
            ;;
    esac
done

echo "Running training pipeline..."
echo ""

# Run from project root
uv run \
    python model/flows/mock_training_pipeline.py $CONFIG_FLAG run $EXTRA_ARGS

echo ""
echo "========================================"
echo "Pipeline Complete!"
echo "========================================"

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment