geraldstanje/deployment.yaml

## deployment.yaml
custom:
  mm_tags: &mm-tags
    "Application Type": "Back End"
    Product: Optimization
    "Sub Department": "Data Science"
    Department: Engineering
    "Service Name": cmv
    "Repo Name": cmv-st
    "Purpose": prod
    "Category": Production

  mm_dev_tags: &mm-dev-tags
    "Application Type": "Back End"
    Product: Optimization
    "Sub Department": "Data Science"
    Department: Engineering
    "Service Name": cmv
    "Repo Name": cmv-st
    "Purpose": dev
    "Category": Development

  model-cluster-props: &model-cluster-props
    spark_version: "10.4.x-cpu-ml-scala2.12"
    node_type_id: "i3.4xlarge"
    init_scripts:
      - dbfs:
          "destination": "dbfs:/databricks/install_lzo_and_configure.sh"
    spark_conf:
      spark.master: "local[*, 4]"
      spark.databricks.cluster.profile: "singleNode"
    aws_attributes:
      "first_on_demand": 1
      "availability": "ON_DEMAND"
      "zone_id": "us-east-1e"
    custom_tags:
      <<: *mm-tags

  model-dev-cluster-props: &model-dev-cluster-props
    spark_version: "10.4.x-cpu-ml-scala2.12"
    node_type_id: "i3.4xlarge"
    init_scripts:
      - dbfs:
          "destination": "dbfs:/databricks/install_lzo_and_configure.sh"
    spark_conf:
      spark.master: "local[*, 4]"
      spark.databricks.cluster.profile: "singleNode"
    aws_attributes:
      "first_on_demand": 1
      "availability": "ON_DEMAND"
      "zone_id": "us-east-1e"
    custom_tags:
      <<: *mm-dev-tags

  etl-cluster-props: &etl-cluster-props
    spark_version: "10.4.x-cpu-ml-scala2.12"
    node_type_id: "r4.4xlarge"
    init_scripts:
      - dbfs:
          "destination": "dbfs:/databricks/install_lzo_and_configure.sh"
    aws_attributes:
      "first_on_demand": 1
      "availability": "SPOT_WITH_FALLBACK"
      "zone_id": "us-east-1e"
      "spot_bid_price_percent": 100
      "ebs_volume_type": "GENERAL_PURPOSE_SSD"
      "ebs_volume_count": 1
      "ebs_volume_size": 100
    custom_tags:
      <<: *mm-tags

  etl-dev-cluster-props: &etl-dev-cluster-props
    spark_version: "10.4.x-cpu-ml-scala2.12"
    node_type_id: "r4.4xlarge"
    init_scripts:
      - dbfs:
          "destination": "dbfs:/databricks/install_lzo_and_configure.sh"
    aws_attributes:
      "first_on_demand": 1
      "availability": "SPOT_WITH_FALLBACK"
      "zone_id": "us-east-1e"
      "spot_bid_price_percent": 100
      "ebs_volume_type": "GENERAL_PURPOSE_SSD"
      "ebs_volume_count": 1
      "ebs_volume_size": 100
    custom_tags:
      <<: *mm-dev-tags

  curve-cluster-props: &curve-cluster-props
    spark_version: "10.4.x-cpu-ml-scala2.12"
    node_type_id: "r4.2xlarge"
    init_scripts:
      - dbfs:
          "destination": "dbfs:/databricks/install_lzo_and_configure.sh"
    driver_node_type_id: "r4.8xlarge"
    spark_conf:
      spark.driver.maxResultSize: 0
    aws_attributes:
      "first_on_demand": 1
      "availability": "ON_DEMAND"
      "zone_id": "us-east-1e"
      "ebs_volume_type": "GENERAL_PURPOSE_SSD"
      "ebs_volume_count": 1
      "ebs_volume_size": 100
    custom_tags:
      <<: *mm-tags

  curve-dev-cluster-props: &curve-dev-cluster-props
    spark_version: "10.4.x-cpu-ml-scala2.12"
    node_type_id: "r4.2xlarge"
    init_scripts:
      - dbfs:
          "destination": "dbfs:/databricks/install_lzo_and_configure.sh"
    driver_node_type_id: "r4.8xlarge"
    spark_conf:
      spark.driver.maxResultSize: 0
    aws_attributes:
      "first_on_demand": 1
      "availability": "ON_DEMAND"
      "zone_id": "us-east-1e"
      "ebs_volume_type": "GENERAL_PURPOSE_SSD"
      "ebs_volume_count": 1
      "ebs_volume_size": 100
    custom_tags:
      <<: *mm-dev-tags

  etl-auto-scale-props: &etl-auto-scale-props
    autoscale:
      min_workers: 2
      max_workers: 8

  curve-auto-scale-props: &curve-auto-scale-props
    autoscale:
      min_workers: 2
      max_workers: 8

  etl-static-cluster: &etl-static-cluster
    new_cluster:
      <<: *etl-cluster-props
      num_workers: 2

  model-static-cluster: &model-static-cluster
    new_cluster:
      <<: *model-cluster-props
      num_workers: 0

  model-dev-static-cluster: &model-dev-static-cluster
    new_cluster:
      <<: *model-dev-cluster-props
      num_workers: 0

  etl-autoscale-cluster: &etl-autoscale-cluster
    new_cluster:
      <<: # merge these two maps and place them here.
        - *etl-cluster-props
        - *etl-auto-scale-props

  etl-dev-autoscale-cluster: &etl-dev-autoscale-cluster
    new_cluster:
      <<: # merge these two maps and place them here.
        - *etl-dev-cluster-props
        - *etl-auto-scale-props

  curve-autoscale-cluster: &curve-autoscale-cluster
    new_cluster:
      <<: # merge these two maps and place them here.
        - *curve-cluster-props
        - *curve-auto-scale-props

  curve-dev-autoscale-cluster: &curve-dev-autoscale-cluster
    new_cluster:
      <<: # merge these two maps and place them here.
        - *curve-dev-cluster-props
        - *curve-auto-scale-props

build:
  python: "poetry"

environments:
  default:
    workflows:
      - name: "bidstat-reader"
        <<: *etl-dev-autoscale-cluster
        email_notifications:
          on_start: [ "asadagopan@mediamath.com", "gstanje@mediamath.com", "wronsiek@mediamath.com" ]
          on_success: [ "asadagopan@mediamath.com", "gstanje@mediamath.com", "wronsiek@mediamath.com" ]
          on_failure: [ "asadagopan@mediamath.com", "gstanje@mediamath.com", "wronsiek@mediamath.com" ]
          no_alert_for_skipped_runs: false
        spark_python_task:
          python_file: "file://src/bidstat_reader.py"
      - name: "experiment"
        <<: *model-dev-static-cluster
        max_concurrent_runs: 3
        spark_python_task:
          python_file: "file://src/experiment_tf.py"
      - name: "curve"
        <<: *curve-dev-autoscale-cluster
        max_concurrent_runs: 3
        spark_python_task:
          python_file: "file://src/curve.py"
      - name: "evaluate"
        <<: *curve-dev-autoscale-cluster
        spark_python_task:
          python_file: "file://src/evaluate_tf.py"
      - name: "predict"
        <<: *model-dev-static-cluster
        spark_python_task:
          python_file: "file://src/predict.py"
      - name: "model_plot"
        <<: *model-dev-static-cluster
        spark_python_task:
          python_file: "file://src/model_plot.py"
  dev:
    workflows:
      - name: "cmv3-dev"
        format: MULTI_TASK
        email_notifications:
          on_start: [ "asadagopan@mediamath.com", "gstanje@mediamath.com", "wronsiek@mediamath.com" ]
          on_success: [ "asadagopan@mediamath.com", "gstanje@mediamath.com", "wronsiek@mediamath.com" ]
          on_failure: [ "asadagopan@mediamath.com", "gstanje@mediamath.com", "wronsiek@mediamath.com" ]
          no_alert_for_skipped_runs: false
        job_clusters:
          - job_cluster_key: "etl-cluster"
            <<: *etl-dev-autoscale-cluster
          - job_cluster_key: "model-cluster"
            <<: *model-dev-static-cluster
          - job_cluster_key: "curve-cluster"
            <<: *curve-dev-autoscale-cluster
        tasks:
          - task_key: "bidstat-reader"
            job_cluster_key: "etl-cluster"
            max_retries: 0
            spark_python_task:
              python_file: "file://src/bidstat_reader.py"
              parameters: [ "--config-file", "generate_data_dev.yaml"]
          - task_key: "model-generation"
            job_cluster_key: "model-cluster"
            max_retries: 0
            spark_python_task:
              python_file: "file://src/experiment_tf.py"
              parameters: [ "--config-file", "model_dev.yaml"]
            depends_on:
              - task_key: "bidstat-reader"
          - task_key: "curve-building"
            job_cluster_key: "curve-cluster"
            max_retries: 0
            spark_python_task:
              python_file: "file://src/curve.py"
              parameters: [ "--config-file", "model_dev.yaml" ]
            depends_on:
              - task_key: "model-generation"
          - task_key: "evaluate-results"
            job_cluster_key: "curve-cluster"
            max_retries: 0
            spark_python_task:
              python_file: "file://src/evaluate_tf.py"
              parameters: [ "--config-file", "model_dev.yaml"]
            depends_on:
              - task_key: "curve-building"
          - task_key: "copy-artifacts"
            job_cluster_key: "model-cluster"
            max_retries: 0
            spark_python_task:
              python_file: "file://src/serving_artifacts.py"
              parameters: [ "--config-file", "model_dev.yaml"]
            depends_on:
              - task_key: "curve-building"
          - task_key: "model-plot"
            job_cluster_key: "model-cluster"
            max_retries: 0
            spark_python_task:
              python_file: "file://src/model_plot.py"
              parameters: [ "--config-file", "model_dev.yaml" ]
            depends_on:
              - task_key: "copy-artifacts"
  prod_model_a:
    workflows:
      - name: "cmv3-prod-model-a"
        format: MULTI_TASK
        email_notifications:
          on_start: [ "asadagopan@mediamath.com", "gstanje@mediamath.com", "wronsiek@mediamath.com" ]
          on_success: [ "asadagopan@mediamath.com", "gstanje@mediamath.com", "wronsiek@mediamath.com" ]
          on_failure: [ "asadagopan@mediamath.com", "gstanje@mediamath.com", "wronsiek@mediamath.com" ]
          no_alert_for_skipped_runs: false
        job_clusters:
          - job_cluster_key: "etl-cluster"
            <<: *etl-autoscale-cluster
          - job_cluster_key: "model-cluster"
            <<: *model-static-cluster
          - job_cluster_key: "curve-cluster"
            <<: *curve-autoscale-cluster
        max_concurrent_runs: 2
        tasks:
          - task_key: "bidstat-reader"
            job_cluster_key: "etl-cluster"
            max_retries: 0
            spark_python_task:
              python_file: "file://src/bidstat_reader.py"
              parameters: ["--config-file", "generate_data_cb_prod_310.yaml", "--start_date", "T-4"]
          - task_key: "model-generation"
            job_cluster_key: "model-cluster"
            max_retries: 0
            spark_python_task:
              python_file: "file://src/experiment_tf.py"
              parameters: ["--config-file", "model_cb_prod_310.yaml", "--model_date", "T-1"]
            depends_on:
              - task_key: "bidstat-reader"
          - task_key: "curve-building"
            job_cluster_key: "curve-cluster"
            max_retries: 0
            spark_python_task:
              python_file: "file://src/curve.py"
              parameters: ["--config-file", "model_cb_prod_310.yaml", "--model_date", "T-1"]
            depends_on:
              - task_key: "model-generation"
          - task_key: "evaluate-results"
            job_cluster_key: "curve-cluster"
            max_retries: 0
            spark_python_task:
              python_file: "file://src/evaluate_tf.py"
              parameters: ["--config-file", "model_cb_prod_310.yaml", "--model_date", "T-1"]
            depends_on:
              - task_key: "curve-building"
          - task_key: "copy-artifacts"
            job_cluster_key: "model-cluster"
            max_retries: 0
            spark_python_task:
              python_file: "file://src/serving_artifacts.py"
              parameters: [ "--config-file", "model_cb_prod_310.yaml", "--model_date", "T-1" ]
            depends_on:
              - task_key: "curve-building"
          - task_key: "model-plot"
            job_cluster_key: "model-cluster"
            max_retries: 0
            spark_python_task:
              python_file: "file://src/model_plot.py"
              parameters: [ "--config-file", "model_cb_prod_310.yaml", "--model_date", "T-1" ]
            depends_on:
              - task_key: "copy-artifacts"
  prod_model_b:
    workflows:
      - name: "cmv3-prod-model-b"
        format: MULTI_TASK
        email_notifications:
          on_start: [ "asadagopan@mediamath.com", "gstanje@mediamath.com", "wronsiek@mediamath.com" ]
          on_success: [ "asadagopan@mediamath.com", "gstanje@mediamath.com", "wronsiek@mediamath.com" ]
          on_failure: [ "asadagopan@mediamath.com", "gstanje@mediamath.com", "wronsiek@mediamath.com" ]
          no_alert_for_skipped_runs: false
        job_clusters:
          - job_cluster_key: "etl-cluster"
            <<: *etl-autoscale-cluster
          - job_cluster_key: "model-cluster"
            <<: *model-static-cluster
          - job_cluster_key: "curve-cluster"
            <<: *curve-autoscale-cluster
        max_concurrent_runs: 2
        tasks:
          - task_key: "bidstat-reader"
            job_cluster_key: "etl-cluster"
            max_retries: 0
            spark_python_task:
              python_file: "file://src/bidstat_reader.py"
              parameters: [ "--config-file", "generate_data_cb_prod_320.yaml", "--start_date", "T-7" ]
          - task_key: "model-generation"
            job_cluster_key: "model-cluster"
            max_retries: 0
            spark_python_task:
              python_file: "file://src/experiment_tf.py"
              parameters: [ "--config-file", "model_cb_prod_320.yaml", "--model_date", "T-1" ]
            depends_on:
              - task_key: "bidstat-reader"
          - task_key: "curve-building"
            job_cluster_key: "curve-cluster"
            max_retries: 0
            spark_python_task:
              python_file: "file://src/curve.py"
              parameters: [ "--config-file", "model_cb_prod_320.yaml", "--model_date", "T-1" ]
            depends_on:
              - task_key: "model-generation"
          - task_key: "evaluate-results"
            job_cluster_key: "curve-cluster"
            max_retries: 0
            spark_python_task:
              python_file: "file://src/evaluate_tf.py"
              parameters: [ "--config-file", "model_cb_prod_320.yaml", "--model_date", "T-1" ]
            depends_on:
              - task_key: "curve-building"
          - task_key: "copy-artifacts"
            job_cluster_key: "model-cluster"
            max_retries: 0
            spark_python_task:
              python_file: "file://src/serving_artifacts.py"
              parameters: [ "--config-file", "model_cb_prod_320.yaml", "--model_date", "T-1" ]
            depends_on:
              - task_key: "curve-building"
          - task_key: "model-plot"
            job_cluster_key: "model-cluster"
            max_retries: 0
            spark_python_task:
              python_file: "file://src/model_plot.py"
              parameters: [ "--config-file", "model_cb_prod_320.yaml", "--model_date", "T-1" ]
            depends_on:
              - task_key: "copy-artifacts"
  data:
    workflows:
      - name: "bidstat-reader"
        spark_python_task:
          python_file: "file://src/bidstat_reader.py"
  experiment:
    workflows:
      - name: "experiment"
        spark_python_task:
          python_file: "file://src/experiment_tf.py"
  curve:
    workflows:
      - name: "curve"
        spark_python_task:
          python_file: "file://src/curve.py"
  evaluate:
    workflows:
      - name: "evaluate"
        spark_python_task:
          python_file: "file://src/evaluate_tf.py"
  predict:
    workflows:
      - name: "predict"
        spark_python_task:
          python_file: "file://src/predict.py"
  model_plot:
    workflows:
      - name: "model_plot"
        spark_python_task:
          python_file: "file://src/model_plot.py"
	custom:
	mm_tags: &mm-tags
	"Application Type": "Back End"
	Product: Optimization
	"Sub Department": "Data Science"
	Department: Engineering
	"Service Name": cmv
	"Repo Name": cmv-st
	"Purpose": prod
	"Category": Production

	mm_dev_tags: &mm-dev-tags
	"Application Type": "Back End"
	Product: Optimization
	"Sub Department": "Data Science"
	Department: Engineering
	"Service Name": cmv
	"Repo Name": cmv-st
	"Purpose": dev
	"Category": Development

	model-cluster-props: &model-cluster-props
	spark_version: "10.4.x-cpu-ml-scala2.12"
	node_type_id: "i3.4xlarge"
	init_scripts:
	- dbfs:
	"destination": "dbfs:/databricks/install_lzo_and_configure.sh"
	spark_conf:
	spark.master: "local[*, 4]"
	spark.databricks.cluster.profile: "singleNode"
	aws_attributes:
	"first_on_demand": 1
	"availability": "ON_DEMAND"
	"zone_id": "us-east-1e"
	custom_tags:
	<<: *mm-tags

	model-dev-cluster-props: &model-dev-cluster-props
	spark_version: "10.4.x-cpu-ml-scala2.12"
	node_type_id: "i3.4xlarge"
	init_scripts:
	- dbfs:
	"destination": "dbfs:/databricks/install_lzo_and_configure.sh"
	spark_conf:
	spark.master: "local[*, 4]"
	spark.databricks.cluster.profile: "singleNode"
	aws_attributes:
	"first_on_demand": 1
	"availability": "ON_DEMAND"
	"zone_id": "us-east-1e"
	custom_tags:
	<<: *mm-dev-tags

	etl-cluster-props: &etl-cluster-props
	spark_version: "10.4.x-cpu-ml-scala2.12"
	node_type_id: "r4.4xlarge"
	init_scripts:
	- dbfs:
	"destination": "dbfs:/databricks/install_lzo_and_configure.sh"
	aws_attributes:
	"first_on_demand": 1
	"availability": "SPOT_WITH_FALLBACK"
	"zone_id": "us-east-1e"
	"spot_bid_price_percent": 100
	"ebs_volume_type": "GENERAL_PURPOSE_SSD"
	"ebs_volume_count": 1
	"ebs_volume_size": 100
	custom_tags:
	<<: *mm-tags

	etl-dev-cluster-props: &etl-dev-cluster-props
	spark_version: "10.4.x-cpu-ml-scala2.12"
	node_type_id: "r4.4xlarge"
	init_scripts:
	- dbfs:
	"destination": "dbfs:/databricks/install_lzo_and_configure.sh"
	aws_attributes:
	"first_on_demand": 1
	"availability": "SPOT_WITH_FALLBACK"
	"zone_id": "us-east-1e"
	"spot_bid_price_percent": 100
	"ebs_volume_type": "GENERAL_PURPOSE_SSD"
	"ebs_volume_count": 1
	"ebs_volume_size": 100
	custom_tags:
	<<: *mm-dev-tags

	curve-cluster-props: &curve-cluster-props
	spark_version: "10.4.x-cpu-ml-scala2.12"
	node_type_id: "r4.2xlarge"
	init_scripts:
	- dbfs:
	"destination": "dbfs:/databricks/install_lzo_and_configure.sh"
	driver_node_type_id: "r4.8xlarge"
	spark_conf:
	spark.driver.maxResultSize: 0
	aws_attributes:
	"first_on_demand": 1
	"availability": "ON_DEMAND"
	"zone_id": "us-east-1e"
	"ebs_volume_type": "GENERAL_PURPOSE_SSD"
	"ebs_volume_count": 1
	"ebs_volume_size": 100
	custom_tags:
	<<: *mm-tags

	curve-dev-cluster-props: &curve-dev-cluster-props
	spark_version: "10.4.x-cpu-ml-scala2.12"
	node_type_id: "r4.2xlarge"
	init_scripts:
	- dbfs:
	"destination": "dbfs:/databricks/install_lzo_and_configure.sh"
	driver_node_type_id: "r4.8xlarge"
	spark_conf:
	spark.driver.maxResultSize: 0
	aws_attributes:
	"first_on_demand": 1
	"availability": "ON_DEMAND"
	"zone_id": "us-east-1e"
	"ebs_volume_type": "GENERAL_PURPOSE_SSD"
	"ebs_volume_count": 1
	"ebs_volume_size": 100
	custom_tags:
	<<: *mm-dev-tags

	etl-auto-scale-props: &etl-auto-scale-props
	autoscale:
	min_workers: 2
	max_workers: 8

	curve-auto-scale-props: &curve-auto-scale-props
	autoscale:
	min_workers: 2
	max_workers: 8

	etl-static-cluster: &etl-static-cluster
	new_cluster:
	<<: *etl-cluster-props
	num_workers: 2

	model-static-cluster: &model-static-cluster
	new_cluster:
	<<: *model-cluster-props
	num_workers: 0

	model-dev-static-cluster: &model-dev-static-cluster
	new_cluster:
	<<: *model-dev-cluster-props
	num_workers: 0

	etl-autoscale-cluster: &etl-autoscale-cluster
	new_cluster:
	<<: # merge these two maps and place them here.
	- *etl-cluster-props
	- *etl-auto-scale-props

	etl-dev-autoscale-cluster: &etl-dev-autoscale-cluster
	new_cluster:
	<<: # merge these two maps and place them here.
	- *etl-dev-cluster-props
	- *etl-auto-scale-props

	curve-autoscale-cluster: &curve-autoscale-cluster
	new_cluster:
	<<: # merge these two maps and place them here.
	- *curve-cluster-props
	- *curve-auto-scale-props

	curve-dev-autoscale-cluster: &curve-dev-autoscale-cluster
	new_cluster:
	<<: # merge these two maps and place them here.
	- *curve-dev-cluster-props
	- *curve-auto-scale-props

	build:
	python: "poetry"

	environments:
	default:
	workflows:
	- name: "bidstat-reader"
	<<: *etl-dev-autoscale-cluster
	email_notifications:
	on_start: [ "asadagopan@mediamath.com", "gstanje@mediamath.com", "wronsiek@mediamath.com" ]
	on_success: [ "asadagopan@mediamath.com", "gstanje@mediamath.com", "wronsiek@mediamath.com" ]
	on_failure: [ "asadagopan@mediamath.com", "gstanje@mediamath.com", "wronsiek@mediamath.com" ]
	no_alert_for_skipped_runs: false
	spark_python_task:
	python_file: "file://src/bidstat_reader.py"
	- name: "experiment"
	<<: *model-dev-static-cluster
	max_concurrent_runs: 3
	spark_python_task:
	python_file: "file://src/experiment_tf.py"
	- name: "curve"
	<<: *curve-dev-autoscale-cluster
	max_concurrent_runs: 3
	spark_python_task:
	python_file: "file://src/curve.py"
	- name: "evaluate"
	<<: *curve-dev-autoscale-cluster
	spark_python_task:
	python_file: "file://src/evaluate_tf.py"
	- name: "predict"
	<<: *model-dev-static-cluster
	spark_python_task:
	python_file: "file://src/predict.py"
	- name: "model_plot"
	<<: *model-dev-static-cluster
	spark_python_task:
	python_file: "file://src/model_plot.py"
	dev:
	workflows:
	- name: "cmv3-dev"
	format: MULTI_TASK
	email_notifications:
	on_start: [ "asadagopan@mediamath.com", "gstanje@mediamath.com", "wronsiek@mediamath.com" ]
	on_success: [ "asadagopan@mediamath.com", "gstanje@mediamath.com", "wronsiek@mediamath.com" ]
	on_failure: [ "asadagopan@mediamath.com", "gstanje@mediamath.com", "wronsiek@mediamath.com" ]
	no_alert_for_skipped_runs: false
	job_clusters:
	- job_cluster_key: "etl-cluster"
	<<: *etl-dev-autoscale-cluster
	- job_cluster_key: "model-cluster"
	<<: *model-dev-static-cluster
	- job_cluster_key: "curve-cluster"
	<<: *curve-dev-autoscale-cluster
	tasks:
	- task_key: "bidstat-reader"
	job_cluster_key: "etl-cluster"
	max_retries: 0
	spark_python_task:
	python_file: "file://src/bidstat_reader.py"
	parameters: [ "--config-file", "generate_data_dev.yaml"]
	- task_key: "model-generation"
	job_cluster_key: "model-cluster"
	max_retries: 0
	spark_python_task:
	python_file: "file://src/experiment_tf.py"
	parameters: [ "--config-file", "model_dev.yaml"]
	depends_on:
	- task_key: "bidstat-reader"
	- task_key: "curve-building"
	job_cluster_key: "curve-cluster"
	max_retries: 0
	spark_python_task:
	python_file: "file://src/curve.py"
	parameters: [ "--config-file", "model_dev.yaml" ]
	depends_on:
	- task_key: "model-generation"
	- task_key: "evaluate-results"
	job_cluster_key: "curve-cluster"
	max_retries: 0
	spark_python_task:
	python_file: "file://src/evaluate_tf.py"
	parameters: [ "--config-file", "model_dev.yaml"]
	depends_on:
	- task_key: "curve-building"
	- task_key: "copy-artifacts"
	job_cluster_key: "model-cluster"
	max_retries: 0
	spark_python_task:
	python_file: "file://src/serving_artifacts.py"
	parameters: [ "--config-file", "model_dev.yaml"]
	depends_on:
	- task_key: "curve-building"
	- task_key: "model-plot"
	job_cluster_key: "model-cluster"
	max_retries: 0
	spark_python_task:
	python_file: "file://src/model_plot.py"
	parameters: [ "--config-file", "model_dev.yaml" ]
	depends_on:
	- task_key: "copy-artifacts"
	prod_model_a:
	workflows:
	- name: "cmv3-prod-model-a"
	format: MULTI_TASK
	email_notifications:
	on_start: [ "asadagopan@mediamath.com", "gstanje@mediamath.com", "wronsiek@mediamath.com" ]
	on_success: [ "asadagopan@mediamath.com", "gstanje@mediamath.com", "wronsiek@mediamath.com" ]
	on_failure: [ "asadagopan@mediamath.com", "gstanje@mediamath.com", "wronsiek@mediamath.com" ]
	no_alert_for_skipped_runs: false
	job_clusters:
	- job_cluster_key: "etl-cluster"
	<<: *etl-autoscale-cluster
	- job_cluster_key: "model-cluster"
	<<: *model-static-cluster
	- job_cluster_key: "curve-cluster"
	<<: *curve-autoscale-cluster
	max_concurrent_runs: 2
	tasks:
	- task_key: "bidstat-reader"
	job_cluster_key: "etl-cluster"
	max_retries: 0
	spark_python_task:
	python_file: "file://src/bidstat_reader.py"
	parameters: ["--config-file", "generate_data_cb_prod_310.yaml", "--start_date", "T-4"]
	- task_key: "model-generation"
	job_cluster_key: "model-cluster"
	max_retries: 0
	spark_python_task:
	python_file: "file://src/experiment_tf.py"
	parameters: ["--config-file", "model_cb_prod_310.yaml", "--model_date", "T-1"]
	depends_on:
	- task_key: "bidstat-reader"
	- task_key: "curve-building"
	job_cluster_key: "curve-cluster"
	max_retries: 0
	spark_python_task:
	python_file: "file://src/curve.py"
	parameters: ["--config-file", "model_cb_prod_310.yaml", "--model_date", "T-1"]
	depends_on:
	- task_key: "model-generation"
	- task_key: "evaluate-results"
	job_cluster_key: "curve-cluster"
	max_retries: 0
	spark_python_task:
	python_file: "file://src/evaluate_tf.py"
	parameters: ["--config-file", "model_cb_prod_310.yaml", "--model_date", "T-1"]
	depends_on:
	- task_key: "curve-building"
	- task_key: "copy-artifacts"
	job_cluster_key: "model-cluster"
	max_retries: 0
	spark_python_task:
	python_file: "file://src/serving_artifacts.py"
	parameters: [ "--config-file", "model_cb_prod_310.yaml", "--model_date", "T-1" ]
	depends_on:
	- task_key: "curve-building"
	- task_key: "model-plot"
	job_cluster_key: "model-cluster"
	max_retries: 0
	spark_python_task:
	python_file: "file://src/model_plot.py"
	parameters: [ "--config-file", "model_cb_prod_310.yaml", "--model_date", "T-1" ]
	depends_on:
	- task_key: "copy-artifacts"
	prod_model_b:
	workflows:
	- name: "cmv3-prod-model-b"
	format: MULTI_TASK
	email_notifications:
	on_start: [ "asadagopan@mediamath.com", "gstanje@mediamath.com", "wronsiek@mediamath.com" ]
	on_success: [ "asadagopan@mediamath.com", "gstanje@mediamath.com", "wronsiek@mediamath.com" ]
	on_failure: [ "asadagopan@mediamath.com", "gstanje@mediamath.com", "wronsiek@mediamath.com" ]
	no_alert_for_skipped_runs: false
	job_clusters:
	- job_cluster_key: "etl-cluster"
	<<: *etl-autoscale-cluster
	- job_cluster_key: "model-cluster"
	<<: *model-static-cluster
	- job_cluster_key: "curve-cluster"
	<<: *curve-autoscale-cluster
	max_concurrent_runs: 2
	tasks:
	- task_key: "bidstat-reader"
	job_cluster_key: "etl-cluster"
	max_retries: 0
	spark_python_task:
	python_file: "file://src/bidstat_reader.py"
	parameters: [ "--config-file", "generate_data_cb_prod_320.yaml", "--start_date", "T-7" ]
	- task_key: "model-generation"
	job_cluster_key: "model-cluster"
	max_retries: 0
	spark_python_task:
	python_file: "file://src/experiment_tf.py"
	parameters: [ "--config-file", "model_cb_prod_320.yaml", "--model_date", "T-1" ]
	depends_on:
	- task_key: "bidstat-reader"
	- task_key: "curve-building"
	job_cluster_key: "curve-cluster"
	max_retries: 0
	spark_python_task:
	python_file: "file://src/curve.py"
	parameters: [ "--config-file", "model_cb_prod_320.yaml", "--model_date", "T-1" ]
	depends_on:
	- task_key: "model-generation"
	- task_key: "evaluate-results"
	job_cluster_key: "curve-cluster"
	max_retries: 0
	spark_python_task:
	python_file: "file://src/evaluate_tf.py"
	parameters: [ "--config-file", "model_cb_prod_320.yaml", "--model_date", "T-1" ]
	depends_on:
	- task_key: "curve-building"
	- task_key: "copy-artifacts"
	job_cluster_key: "model-cluster"
	max_retries: 0
	spark_python_task:
	python_file: "file://src/serving_artifacts.py"
	parameters: [ "--config-file", "model_cb_prod_320.yaml", "--model_date", "T-1" ]
	depends_on:
	- task_key: "curve-building"
	- task_key: "model-plot"
	job_cluster_key: "model-cluster"
	max_retries: 0
	spark_python_task:
	python_file: "file://src/model_plot.py"
	parameters: [ "--config-file", "model_cb_prod_320.yaml", "--model_date", "T-1" ]
	depends_on:
	- task_key: "copy-artifacts"
	data:
	workflows:
	- name: "bidstat-reader"
	spark_python_task:
	python_file: "file://src/bidstat_reader.py"
	experiment:
	workflows:
	- name: "experiment"
	spark_python_task:
	python_file: "file://src/experiment_tf.py"
	curve:
	workflows:
	- name: "curve"
	spark_python_task:
	python_file: "file://src/curve.py"
	evaluate:
	workflows:
	- name: "evaluate"
	spark_python_task:
	python_file: "file://src/evaluate_tf.py"
	predict:
	workflows:
	- name: "predict"
	spark_python_task:
	python_file: "file://src/predict.py"
	model_plot:
	workflows:
	- name: "model_plot"
	spark_python_task:
	python_file: "file://src/model_plot.py"
No results found