Created
January 7, 2023 18:44
-
-
Save geraldstanje/97223d34e5a95fa649cb377492be9410 to your computer and use it in GitHub Desktop.
databricks file
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| custom: | |
| mm_tags: &mm-tags | |
| "Application Type": "Back End" | |
| Product: Optimization | |
| "Sub Department": "Data Science" | |
| Department: Engineering | |
| "Service Name": cmv | |
| "Repo Name": cmv-st | |
| "Purpose": prod | |
| "Category": Production | |
| mm_dev_tags: &mm-dev-tags | |
| "Application Type": "Back End" | |
| Product: Optimization | |
| "Sub Department": "Data Science" | |
| Department: Engineering | |
| "Service Name": cmv | |
| "Repo Name": cmv-st | |
| "Purpose": dev | |
| "Category": Development | |
| model-cluster-props: &model-cluster-props | |
| spark_version: "10.4.x-cpu-ml-scala2.12" | |
| node_type_id: "i3.4xlarge" | |
| init_scripts: | |
| - dbfs: | |
| "destination": "dbfs:/databricks/install_lzo_and_configure.sh" | |
| spark_conf: | |
| spark.master: "local[*, 4]" | |
| spark.databricks.cluster.profile: "singleNode" | |
| aws_attributes: | |
| "first_on_demand": 1 | |
| "availability": "ON_DEMAND" | |
| "zone_id": "us-east-1e" | |
| custom_tags: | |
| <<: *mm-tags | |
| model-dev-cluster-props: &model-dev-cluster-props | |
| spark_version: "10.4.x-cpu-ml-scala2.12" | |
| node_type_id: "i3.4xlarge" | |
| init_scripts: | |
| - dbfs: | |
| "destination": "dbfs:/databricks/install_lzo_and_configure.sh" | |
| spark_conf: | |
| spark.master: "local[*, 4]" | |
| spark.databricks.cluster.profile: "singleNode" | |
| aws_attributes: | |
| "first_on_demand": 1 | |
| "availability": "ON_DEMAND" | |
| "zone_id": "us-east-1e" | |
| custom_tags: | |
| <<: *mm-dev-tags | |
| etl-cluster-props: &etl-cluster-props | |
| spark_version: "10.4.x-cpu-ml-scala2.12" | |
| node_type_id: "r4.4xlarge" | |
| init_scripts: | |
| - dbfs: | |
| "destination": "dbfs:/databricks/install_lzo_and_configure.sh" | |
| aws_attributes: | |
| "first_on_demand": 1 | |
| "availability": "SPOT_WITH_FALLBACK" | |
| "zone_id": "us-east-1e" | |
| "spot_bid_price_percent": 100 | |
| "ebs_volume_type": "GENERAL_PURPOSE_SSD" | |
| "ebs_volume_count": 1 | |
| "ebs_volume_size": 100 | |
| custom_tags: | |
| <<: *mm-tags | |
| etl-dev-cluster-props: &etl-dev-cluster-props | |
| spark_version: "10.4.x-cpu-ml-scala2.12" | |
| node_type_id: "r4.4xlarge" | |
| init_scripts: | |
| - dbfs: | |
| "destination": "dbfs:/databricks/install_lzo_and_configure.sh" | |
| aws_attributes: | |
| "first_on_demand": 1 | |
| "availability": "SPOT_WITH_FALLBACK" | |
| "zone_id": "us-east-1e" | |
| "spot_bid_price_percent": 100 | |
| "ebs_volume_type": "GENERAL_PURPOSE_SSD" | |
| "ebs_volume_count": 1 | |
| "ebs_volume_size": 100 | |
| custom_tags: | |
| <<: *mm-dev-tags | |
| curve-cluster-props: &curve-cluster-props | |
| spark_version: "10.4.x-cpu-ml-scala2.12" | |
| node_type_id: "r4.2xlarge" | |
| init_scripts: | |
| - dbfs: | |
| "destination": "dbfs:/databricks/install_lzo_and_configure.sh" | |
| driver_node_type_id: "r4.8xlarge" | |
| spark_conf: | |
| spark.driver.maxResultSize: 0 | |
| aws_attributes: | |
| "first_on_demand": 1 | |
| "availability": "ON_DEMAND" | |
| "zone_id": "us-east-1e" | |
| "ebs_volume_type": "GENERAL_PURPOSE_SSD" | |
| "ebs_volume_count": 1 | |
| "ebs_volume_size": 100 | |
| custom_tags: | |
| <<: *mm-tags | |
| curve-dev-cluster-props: &curve-dev-cluster-props | |
| spark_version: "10.4.x-cpu-ml-scala2.12" | |
| node_type_id: "r4.2xlarge" | |
| init_scripts: | |
| - dbfs: | |
| "destination": "dbfs:/databricks/install_lzo_and_configure.sh" | |
| driver_node_type_id: "r4.8xlarge" | |
| spark_conf: | |
| spark.driver.maxResultSize: 0 | |
| aws_attributes: | |
| "first_on_demand": 1 | |
| "availability": "ON_DEMAND" | |
| "zone_id": "us-east-1e" | |
| "ebs_volume_type": "GENERAL_PURPOSE_SSD" | |
| "ebs_volume_count": 1 | |
| "ebs_volume_size": 100 | |
| custom_tags: | |
| <<: *mm-dev-tags | |
| etl-auto-scale-props: &etl-auto-scale-props | |
| autoscale: | |
| min_workers: 2 | |
| max_workers: 8 | |
| curve-auto-scale-props: &curve-auto-scale-props | |
| autoscale: | |
| min_workers: 2 | |
| max_workers: 8 | |
| etl-static-cluster: &etl-static-cluster | |
| new_cluster: | |
| <<: *etl-cluster-props | |
| num_workers: 2 | |
| model-static-cluster: &model-static-cluster | |
| new_cluster: | |
| <<: *model-cluster-props | |
| num_workers: 0 | |
| model-dev-static-cluster: &model-dev-static-cluster | |
| new_cluster: | |
| <<: *model-dev-cluster-props | |
| num_workers: 0 | |
| etl-autoscale-cluster: &etl-autoscale-cluster | |
| new_cluster: | |
| <<: # merge these two maps and place them here. | |
| - *etl-cluster-props | |
| - *etl-auto-scale-props | |
| etl-dev-autoscale-cluster: &etl-dev-autoscale-cluster | |
| new_cluster: | |
| <<: # merge these two maps and place them here. | |
| - *etl-dev-cluster-props | |
| - *etl-auto-scale-props | |
| curve-autoscale-cluster: &curve-autoscale-cluster | |
| new_cluster: | |
| <<: # merge these two maps and place them here. | |
| - *curve-cluster-props | |
| - *curve-auto-scale-props | |
| curve-dev-autoscale-cluster: &curve-dev-autoscale-cluster | |
| new_cluster: | |
| <<: # merge these two maps and place them here. | |
| - *curve-dev-cluster-props | |
| - *curve-auto-scale-props | |
| build: | |
| python: "poetry" | |
| environments: | |
| default: | |
| workflows: | |
| - name: "bidstat-reader" | |
| <<: *etl-dev-autoscale-cluster | |
| email_notifications: | |
| on_start: [ "asadagopan@mediamath.com", "gstanje@mediamath.com", "wronsiek@mediamath.com" ] | |
| on_success: [ "asadagopan@mediamath.com", "gstanje@mediamath.com", "wronsiek@mediamath.com" ] | |
| on_failure: [ "asadagopan@mediamath.com", "gstanje@mediamath.com", "wronsiek@mediamath.com" ] | |
| no_alert_for_skipped_runs: false | |
| spark_python_task: | |
| python_file: "file://src/bidstat_reader.py" | |
| - name: "experiment" | |
| <<: *model-dev-static-cluster | |
| max_concurrent_runs: 3 | |
| spark_python_task: | |
| python_file: "file://src/experiment_tf.py" | |
| - name: "curve" | |
| <<: *curve-dev-autoscale-cluster | |
| max_concurrent_runs: 3 | |
| spark_python_task: | |
| python_file: "file://src/curve.py" | |
| - name: "evaluate" | |
| <<: *curve-dev-autoscale-cluster | |
| spark_python_task: | |
| python_file: "file://src/evaluate_tf.py" | |
| - name: "predict" | |
| <<: *model-dev-static-cluster | |
| spark_python_task: | |
| python_file: "file://src/predict.py" | |
| - name: "model_plot" | |
| <<: *model-dev-static-cluster | |
| spark_python_task: | |
| python_file: "file://src/model_plot.py" | |
| dev: | |
| workflows: | |
| - name: "cmv3-dev" | |
| format: MULTI_TASK | |
| email_notifications: | |
| on_start: [ "asadagopan@mediamath.com", "gstanje@mediamath.com", "wronsiek@mediamath.com" ] | |
| on_success: [ "asadagopan@mediamath.com", "gstanje@mediamath.com", "wronsiek@mediamath.com" ] | |
| on_failure: [ "asadagopan@mediamath.com", "gstanje@mediamath.com", "wronsiek@mediamath.com" ] | |
| no_alert_for_skipped_runs: false | |
| job_clusters: | |
| - job_cluster_key: "etl-cluster" | |
| <<: *etl-dev-autoscale-cluster | |
| - job_cluster_key: "model-cluster" | |
| <<: *model-dev-static-cluster | |
| - job_cluster_key: "curve-cluster" | |
| <<: *curve-dev-autoscale-cluster | |
| tasks: | |
| - task_key: "bidstat-reader" | |
| job_cluster_key: "etl-cluster" | |
| max_retries: 0 | |
| spark_python_task: | |
| python_file: "file://src/bidstat_reader.py" | |
| parameters: [ "--config-file", "generate_data_dev.yaml"] | |
| - task_key: "model-generation" | |
| job_cluster_key: "model-cluster" | |
| max_retries: 0 | |
| spark_python_task: | |
| python_file: "file://src/experiment_tf.py" | |
| parameters: [ "--config-file", "model_dev.yaml"] | |
| depends_on: | |
| - task_key: "bidstat-reader" | |
| - task_key: "curve-building" | |
| job_cluster_key: "curve-cluster" | |
| max_retries: 0 | |
| spark_python_task: | |
| python_file: "file://src/curve.py" | |
| parameters: [ "--config-file", "model_dev.yaml" ] | |
| depends_on: | |
| - task_key: "model-generation" | |
| - task_key: "evaluate-results" | |
| job_cluster_key: "curve-cluster" | |
| max_retries: 0 | |
| spark_python_task: | |
| python_file: "file://src/evaluate_tf.py" | |
| parameters: [ "--config-file", "model_dev.yaml"] | |
| depends_on: | |
| - task_key: "curve-building" | |
| - task_key: "copy-artifacts" | |
| job_cluster_key: "model-cluster" | |
| max_retries: 0 | |
| spark_python_task: | |
| python_file: "file://src/serving_artifacts.py" | |
| parameters: [ "--config-file", "model_dev.yaml"] | |
| depends_on: | |
| - task_key: "curve-building" | |
| - task_key: "model-plot" | |
| job_cluster_key: "model-cluster" | |
| max_retries: 0 | |
| spark_python_task: | |
| python_file: "file://src/model_plot.py" | |
| parameters: [ "--config-file", "model_dev.yaml" ] | |
| depends_on: | |
| - task_key: "copy-artifacts" | |
| prod_model_a: | |
| workflows: | |
| - name: "cmv3-prod-model-a" | |
| format: MULTI_TASK | |
| email_notifications: | |
| on_start: [ "asadagopan@mediamath.com", "gstanje@mediamath.com", "wronsiek@mediamath.com" ] | |
| on_success: [ "asadagopan@mediamath.com", "gstanje@mediamath.com", "wronsiek@mediamath.com" ] | |
| on_failure: [ "asadagopan@mediamath.com", "gstanje@mediamath.com", "wronsiek@mediamath.com" ] | |
| no_alert_for_skipped_runs: false | |
| job_clusters: | |
| - job_cluster_key: "etl-cluster" | |
| <<: *etl-autoscale-cluster | |
| - job_cluster_key: "model-cluster" | |
| <<: *model-static-cluster | |
| - job_cluster_key: "curve-cluster" | |
| <<: *curve-autoscale-cluster | |
| max_concurrent_runs: 2 | |
| tasks: | |
| - task_key: "bidstat-reader" | |
| job_cluster_key: "etl-cluster" | |
| max_retries: 0 | |
| spark_python_task: | |
| python_file: "file://src/bidstat_reader.py" | |
| parameters: ["--config-file", "generate_data_cb_prod_310.yaml", "--start_date", "T-4"] | |
| - task_key: "model-generation" | |
| job_cluster_key: "model-cluster" | |
| max_retries: 0 | |
| spark_python_task: | |
| python_file: "file://src/experiment_tf.py" | |
| parameters: ["--config-file", "model_cb_prod_310.yaml", "--model_date", "T-1"] | |
| depends_on: | |
| - task_key: "bidstat-reader" | |
| - task_key: "curve-building" | |
| job_cluster_key: "curve-cluster" | |
| max_retries: 0 | |
| spark_python_task: | |
| python_file: "file://src/curve.py" | |
| parameters: ["--config-file", "model_cb_prod_310.yaml", "--model_date", "T-1"] | |
| depends_on: | |
| - task_key: "model-generation" | |
| - task_key: "evaluate-results" | |
| job_cluster_key: "curve-cluster" | |
| max_retries: 0 | |
| spark_python_task: | |
| python_file: "file://src/evaluate_tf.py" | |
| parameters: ["--config-file", "model_cb_prod_310.yaml", "--model_date", "T-1"] | |
| depends_on: | |
| - task_key: "curve-building" | |
| - task_key: "copy-artifacts" | |
| job_cluster_key: "model-cluster" | |
| max_retries: 0 | |
| spark_python_task: | |
| python_file: "file://src/serving_artifacts.py" | |
| parameters: [ "--config-file", "model_cb_prod_310.yaml", "--model_date", "T-1" ] | |
| depends_on: | |
| - task_key: "curve-building" | |
| - task_key: "model-plot" | |
| job_cluster_key: "model-cluster" | |
| max_retries: 0 | |
| spark_python_task: | |
| python_file: "file://src/model_plot.py" | |
| parameters: [ "--config-file", "model_cb_prod_310.yaml", "--model_date", "T-1" ] | |
| depends_on: | |
| - task_key: "copy-artifacts" | |
| prod_model_b: | |
| workflows: | |
| - name: "cmv3-prod-model-b" | |
| format: MULTI_TASK | |
| email_notifications: | |
| on_start: [ "asadagopan@mediamath.com", "gstanje@mediamath.com", "wronsiek@mediamath.com" ] | |
| on_success: [ "asadagopan@mediamath.com", "gstanje@mediamath.com", "wronsiek@mediamath.com" ] | |
| on_failure: [ "asadagopan@mediamath.com", "gstanje@mediamath.com", "wronsiek@mediamath.com" ] | |
| no_alert_for_skipped_runs: false | |
| job_clusters: | |
| - job_cluster_key: "etl-cluster" | |
| <<: *etl-autoscale-cluster | |
| - job_cluster_key: "model-cluster" | |
| <<: *model-static-cluster | |
| - job_cluster_key: "curve-cluster" | |
| <<: *curve-autoscale-cluster | |
| max_concurrent_runs: 2 | |
| tasks: | |
| - task_key: "bidstat-reader" | |
| job_cluster_key: "etl-cluster" | |
| max_retries: 0 | |
| spark_python_task: | |
| python_file: "file://src/bidstat_reader.py" | |
| parameters: [ "--config-file", "generate_data_cb_prod_320.yaml", "--start_date", "T-7" ] | |
| - task_key: "model-generation" | |
| job_cluster_key: "model-cluster" | |
| max_retries: 0 | |
| spark_python_task: | |
| python_file: "file://src/experiment_tf.py" | |
| parameters: [ "--config-file", "model_cb_prod_320.yaml", "--model_date", "T-1" ] | |
| depends_on: | |
| - task_key: "bidstat-reader" | |
| - task_key: "curve-building" | |
| job_cluster_key: "curve-cluster" | |
| max_retries: 0 | |
| spark_python_task: | |
| python_file: "file://src/curve.py" | |
| parameters: [ "--config-file", "model_cb_prod_320.yaml", "--model_date", "T-1" ] | |
| depends_on: | |
| - task_key: "model-generation" | |
| - task_key: "evaluate-results" | |
| job_cluster_key: "curve-cluster" | |
| max_retries: 0 | |
| spark_python_task: | |
| python_file: "file://src/evaluate_tf.py" | |
| parameters: [ "--config-file", "model_cb_prod_320.yaml", "--model_date", "T-1" ] | |
| depends_on: | |
| - task_key: "curve-building" | |
| - task_key: "copy-artifacts" | |
| job_cluster_key: "model-cluster" | |
| max_retries: 0 | |
| spark_python_task: | |
| python_file: "file://src/serving_artifacts.py" | |
| parameters: [ "--config-file", "model_cb_prod_320.yaml", "--model_date", "T-1" ] | |
| depends_on: | |
| - task_key: "curve-building" | |
| - task_key: "model-plot" | |
| job_cluster_key: "model-cluster" | |
| max_retries: 0 | |
| spark_python_task: | |
| python_file: "file://src/model_plot.py" | |
| parameters: [ "--config-file", "model_cb_prod_320.yaml", "--model_date", "T-1" ] | |
| depends_on: | |
| - task_key: "copy-artifacts" | |
| data: | |
| workflows: | |
| - name: "bidstat-reader" | |
| spark_python_task: | |
| python_file: "file://src/bidstat_reader.py" | |
| experiment: | |
| workflows: | |
| - name: "experiment" | |
| spark_python_task: | |
| python_file: "file://src/experiment_tf.py" | |
| curve: | |
| workflows: | |
| - name: "curve" | |
| spark_python_task: | |
| python_file: "file://src/curve.py" | |
| evaluate: | |
| workflows: | |
| - name: "evaluate" | |
| spark_python_task: | |
| python_file: "file://src/evaluate_tf.py" | |
| predict: | |
| workflows: | |
| - name: "predict" | |
| spark_python_task: | |
| python_file: "file://src/predict.py" | |
| model_plot: | |
| workflows: | |
| - name: "model_plot" | |
| spark_python_task: | |
| python_file: "file://src/model_plot.py" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment