Created
October 5, 2021 08:52
-
-
Save sa-/09c8a347d0dbe864d29d1f8f66eb8e88 to your computer and use it in GitHub Desktop.
Spylon + jupyter kernel gateway container
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| FROM python:3.8-slim | |
| WORKDIR /tmp | |
| ENV SPARK_HOME=/opt/spark-3.1.2-bin-hadoop3.2 | |
| # Spark, julia | |
| RUN apt update \ | |
| && apt-get install -y \ | |
| wget htop zsh git gnupg curl zip unzip vim cmake tmux sudo openjdk-11-jre \ | |
| && wget https://dlcdn.apache.org/spark/spark-3.1.2/spark-3.1.2-bin-hadoop3.2.tgz \ | |
| && tar -xf spark-3.1.2-bin-hadoop3.2.tgz \ | |
| && rm spark-3.1.2-bin-hadoop3.2.tgz \ | |
| && mv spark-3.1.2-bin-hadoop3.2 /opt \ | |
| && ln -sf /opt/spark-3.1.2-bin-hadoop3.2/bin/* /usr/local/bin/ \ | |
| && wget https://storage.googleapis.com/hadoop-lib/gcs/gcs-connector-hadoop3-latest.jar \ | |
| && mv gcs-connector-hadoop3-latest.jar $SPARK_HOME/jars \ | |
| && useradd -ms /bin/bash service_user \ | |
| && usermod -aG sudo service_user \ | |
| && echo '%sudo ALL=(ALL) NOPASSWD:ALL' >> /etc/sudoers \ | |
| && rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/* | |
| USER service_user | |
| WORKDIR /home/service_user | |
| ENV PATH=/home/service_user/.local/bin:/home/service_user/google-cloud-sdk/bin:$PATH | |
| RUN pip install jupyterlab jupyter_kernel_gateway psutil spylon-kernel gcsfs fsspec papermill[all] \ | |
| && python -m spylon_kernel install --user \ | |
| && echo "export JULIA_NUM_THREADS=\$(nproc --all)" >> $HOME/.zshrc \ | |
| && curl -O https://dl.google.com/dl/cloudsdk/channels/rapid/downloads/google-cloud-sdk-353.0.0-linux-x86_64.tar.gz \ | |
| && tar -xf google-cloud-sdk-353.0.0-linux-x86_64.tar.gz \ | |
| && rm google-cloud-sdk-353.0.0-linux-x86_64.tar.gz \ | |
| && ./google-cloud-sdk/install.sh \ | |
| && echo "set -g mouse on\nset -g default-shell /bin/bash" >> ~/.tmux.conf | |
| COPY --chown=service_user entrypoint.bash /home/service_user/ | |
| COPY --chown=service_user jupyter_kernel_gateway_config.py /home/service_user/.jupyter/ | |
| COPY --chown=service_user spark-defaults.conf $SPARK_HOME/conf/ | |
| EXPOSE 8080 8888 8890 | |
| SHELL [ "/bin/zsh" ] | |
| ENTRYPOINT [ "/bin/bash", "entrypoint.bash" ] |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #! /bin/bash | |
| if [ ! -z "$GOOGLE_APPLICATION_CREDENTIALS" ]; | |
| then gcloud auth activate-service-account --key-file=$GOOGLE_APPLICATION_CREDENTIALS | |
| fi | |
| python -c 'import psutil; print(f"\nspark.driver.memory {int(psutil.virtual_memory().total / 1024**3 * 0.9 - 2)}g")' \ | |
| >> $SPARK_HOME/conf/spark-defaults.conf | |
| jupyter kernelgateway |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # Configuration file for jupyter-kernel-gateway. | |
| #------------------------------------------------------------------------------ | |
| # Application(SingletonConfigurable) configuration | |
| #------------------------------------------------------------------------------ | |
| ## This is an application. | |
| ## The date format used by logging formatters for %(asctime)s | |
| # Default: '%Y-%m-%d %H:%M:%S' | |
| # c.Application.log_datefmt = '%Y-%m-%d %H:%M:%S' | |
| ## The Logging format template | |
| # Default: '[%(name)s]%(highlevel)s %(message)s' | |
| # c.Application.log_format = '[%(name)s]%(highlevel)s %(message)s' | |
| ## Set the log level by value or name. | |
| # Choices: any of [0, 10, 20, 30, 40, 50, 'DEBUG', 'INFO', 'WARN', 'ERROR', 'CRITICAL'] | |
| # Default: 30 | |
| # c.Application.log_level = 30 | |
| ## Instead of starting the Application, dump configuration to stdout | |
| # Default: False | |
| # c.Application.show_config = False | |
| ## Instead of starting the Application, dump configuration to stdout (as JSON) | |
| # Default: False | |
| # c.Application.show_config_json = False | |
| #------------------------------------------------------------------------------ | |
| # JupyterApp(Application) configuration | |
| #------------------------------------------------------------------------------ | |
| ## Base class for Jupyter applications | |
| ## Answer yes to any prompts. | |
| # Default: False | |
| # c.JupyterApp.answer_yes = False | |
| ## Full path of a config file. | |
| # Default: '' | |
| # c.JupyterApp.config_file = '' | |
| ## Specify a config file to load. | |
| # Default: '' | |
| # c.JupyterApp.config_file_name = '' | |
| ## Generate default config file. | |
| # Default: False | |
| # c.JupyterApp.generate_config = False | |
| ## The date format used by logging formatters for %(asctime)s | |
| # See also: Application.log_datefmt | |
| # c.JupyterApp.log_datefmt = '%Y-%m-%d %H:%M:%S' | |
| ## The Logging format template | |
| # See also: Application.log_format | |
| # c.JupyterApp.log_format = '[%(name)s]%(highlevel)s %(message)s' | |
| ## Set the log level by value or name. | |
| # See also: Application.log_level | |
| # c.JupyterApp.log_level = 30 | |
| ## Instead of starting the Application, dump configuration to stdout | |
| # See also: Application.show_config | |
| # c.JupyterApp.show_config = False | |
| ## Instead of starting the Application, dump configuration to stdout (as JSON) | |
| # See also: Application.show_config_json | |
| # c.JupyterApp.show_config_json = False | |
| #------------------------------------------------------------------------------ | |
| # KernelGatewayApp(JupyterApp) configuration | |
| #------------------------------------------------------------------------------ | |
| ## Application that provisions Jupyter kernels and proxies HTTP/Websocket traffic | |
| # to the kernels. | |
| # | |
| # - reads command line and environment variable settings - initializes managers | |
| # and routes - creates a Tornado HTTP server - starts the Tornado event loop | |
| ## Sets the Access-Control-Allow-Credentials header. (KG_ALLOW_CREDENTIALS env | |
| # var) | |
| # Default: '' | |
| # c.KernelGatewayApp.allow_credentials = '' | |
| ## Sets the Access-Control-Allow-Headers header. (KG_ALLOW_HEADERS env var) | |
| # Default: '' | |
| # c.KernelGatewayApp.allow_headers = '' | |
| ## Sets the Access-Control-Allow-Methods header. (KG_ALLOW_METHODS env var) | |
| # Default: '' | |
| # c.KernelGatewayApp.allow_methods = '' | |
| ## Sets the Access-Control-Allow-Origin header. (KG_ALLOW_ORIGIN env var) | |
| # Default: '' | |
| # c.KernelGatewayApp.allow_origin = '' | |
| ## Answer yes to any prompts. | |
| # See also: JupyterApp.answer_yes | |
| # c.KernelGatewayApp.answer_yes = False | |
| ## Controls which API to expose, that of a Jupyter notebook server, the seed | |
| # notebook's, or one provided by another module, respectively using values | |
| # 'kernel_gateway.jupyter_websocket', 'kernel_gateway.notebook_http', or another | |
| # fully qualified module name (KG_API env var) | |
| # Default: 'kernel_gateway.jupyter_websocket' | |
| # c.KernelGatewayApp.api = 'kernel_gateway.jupyter_websocket' | |
| ## Authorization token required for all requests (KG_AUTH_TOKEN env var) | |
| # Default: '' | |
| # c.KernelGatewayApp.auth_token = '' | |
| ## The base path for mounting all API resources (KG_BASE_URL env var) | |
| # Default: '/' | |
| # c.KernelGatewayApp.base_url = '/' | |
| ## The full path to an SSL/TLS certificate file. (KG_CERTFILE env var) | |
| # Default: None | |
| # c.KernelGatewayApp.certfile = None | |
| ## The full path to a certificate authority certificate for SSL/TLS client | |
| # authentication. (KG_CLIENT_CA env var) | |
| # Default: None | |
| # c.KernelGatewayApp.client_ca = None | |
| ## Full path of a config file. | |
| # See also: JupyterApp.config_file | |
| # c.KernelGatewayApp.config_file = '' | |
| ## Specify a config file to load. | |
| # See also: JupyterApp.config_file_name | |
| # c.KernelGatewayApp.config_file_name = '' | |
| ## Default kernel name when spawning a kernel (KG_DEFAULT_KERNEL_NAME env var) | |
| # Default: '' | |
| # c.KernelGatewayApp.default_kernel_name = '' | |
| ## Environment variables allowed to be inherited from the spawning process by the | |
| # kernel | |
| # Default: [] | |
| c.KernelGatewayApp.env_process_whitelist = ["HOME", "SPARK_HOME", "JULIA_NUM_THREADS", "GOOGLE_APPLICATION_CREDENTIALS"] | |
| ## Sets the Access-Control-Expose-Headers header. (KG_EXPOSE_HEADERS env var) | |
| # Default: '' | |
| # c.KernelGatewayApp.expose_headers = '' | |
| ## Override any kernel name specified in a notebook or request | |
| # (KG_FORCE_KERNEL_NAME env var) | |
| # Default: '' | |
| # c.KernelGatewayApp.force_kernel_name = '' | |
| ## Generate default config file. | |
| # See also: JupyterApp.generate_config | |
| # c.KernelGatewayApp.generate_config = False | |
| ## IP address on which to listen (KG_IP env var) | |
| # Default: '127.0.0.1' | |
| c.KernelGatewayApp.ip = '0.0.0.0' | |
| ## The kernel manager class to use. | |
| # Default: 'kernel_gateway.services.kernels.manager.SeedingMappingKernelManager' | |
| # c.KernelGatewayApp.kernel_manager_class = 'kernel_gateway.services.kernels.manager.SeedingMappingKernelManager' | |
| ## The kernel spec manager class to use. Should be a subclass of | |
| # `jupyter_client.kernelspec.KernelSpecManager`. | |
| # Default: 'jupyter_client.kernelspec.KernelSpecManager' | |
| # c.KernelGatewayApp.kernel_spec_manager_class = 'jupyter_client.kernelspec.KernelSpecManager' | |
| ## The full path to a private key file for usage with SSL/TLS. (KG_KEYFILE env | |
| # var) | |
| # Default: None | |
| # c.KernelGatewayApp.keyfile = None | |
| ## The date format used by logging formatters for %(asctime)s | |
| # See also: Application.log_datefmt | |
| # c.KernelGatewayApp.log_datefmt = '%Y-%m-%d %H:%M:%S' | |
| ## The Logging format template | |
| # See also: Application.log_format | |
| # c.KernelGatewayApp.log_format = '[%(name)s]%(highlevel)s %(message)s' | |
| ## Set the log level by value or name. | |
| # See also: Application.log_level | |
| # c.KernelGatewayApp.log_level = 30 | |
| ## Sets the Access-Control-Max-Age header. (KG_MAX_AGE env var) | |
| # Default: '' | |
| # c.KernelGatewayApp.max_age = '' | |
| ## Limits the number of kernel instances allowed to run by this gateway. | |
| # Unbounded by default. (KG_MAX_KERNELS env var) | |
| # Default: None | |
| # c.KernelGatewayApp.max_kernels = None | |
| ## Port on which to listen (KG_PORT env var) | |
| # Default: 8888 | |
| c.KernelGatewayApp.port = 8890 | |
| ## Number of ports to try if the specified port is not available (KG_PORT_RETRIES | |
| # env var) | |
| # Default: 50 | |
| # c.KernelGatewayApp.port_retries = 50 | |
| ## Number of kernels to prespawn using the default language. No prespawn by | |
| # default. (KG_PRESPAWN_COUNT env var) | |
| # Default: None | |
| # c.KernelGatewayApp.prespawn_count = None | |
| ## Runs the notebook (.ipynb) at the given URI on every kernel launched. No seed | |
| # by default. (KG_SEED_URI env var) | |
| # Default: None | |
| # c.KernelGatewayApp.seed_uri = None | |
| ## Instead of starting the Application, dump configuration to stdout | |
| # See also: Application.show_config | |
| # c.KernelGatewayApp.show_config = False | |
| ## Instead of starting the Application, dump configuration to stdout (as JSON) | |
| # See also: Application.show_config_json | |
| # c.KernelGatewayApp.show_config_json = False | |
| ## Sets the SSL version to use for the web socket connection. (KG_SSL_VERSION env | |
| # var) | |
| # Default: None | |
| # c.KernelGatewayApp.ssl_version = None | |
| ## Use x-* header values for overriding the remote-ip, useful when application is | |
| # behing a proxy. (KG_TRUST_XHEADERS env var) | |
| # Default: False | |
| # c.KernelGatewayApp.trust_xheaders = False | |
| #------------------------------------------------------------------------------ | |
| # NotebookHTTPPersonality(LoggingConfigurable) configuration | |
| #------------------------------------------------------------------------------ | |
| ## Personality for notebook-http support, creating REST endpoints based on the | |
| # notebook's annotated cells | |
| ## Optional API to download the notebook source code in notebook-http mode, | |
| # defaults to not allow | |
| # Default: False | |
| # c.NotebookHTTPPersonality.allow_notebook_download = False | |
| ## Determines which module is used to parse the notebook for endpoints and | |
| # documentation. Valid module names include | |
| # 'kernel_gateway.notebook_http.cell.parser' and | |
| # 'kernel_gateway.notebook_http.swagger.parser'. (KG_CELL_PARSER env var) | |
| # Default: 'kernel_gateway.notebook_http.cell.parser' | |
| # c.NotebookHTTPPersonality.cell_parser = 'kernel_gateway.notebook_http.cell.parser' | |
| ## Maps kernel language to code comment syntax | |
| # Default: {'scala': '//', None: '#'} | |
| # c.NotebookHTTPPersonality.comment_prefix = {'scala': '//', None: '#'} | |
| ## Serve static files on disk in the given path as /public, defaults to not serve | |
| # Default: None | |
| # c.NotebookHTTPPersonality.static_path = None | |
| #------------------------------------------------------------------------------ | |
| # JupyterWebsocketPersonality(LoggingConfigurable) configuration | |
| #------------------------------------------------------------------------------ | |
| ## Personality for standard websocket functionality, registering endpoints that | |
| # are part of the Jupyter Kernel Gateway API | |
| ## Environment variables allowed to be set when a client requests a new kernel | |
| # Default: [] | |
| # c.JupyterWebsocketPersonality.env_whitelist = [] | |
| ## Permits listing of the running kernels using API endpoints /api/kernels and | |
| # /api/sessions (KG_LIST_KERNELS env var). Note: Jupyter Notebook allows this by | |
| # default but kernel gateway does not. | |
| # Default: False | |
| # c.JupyterWebsocketPersonality.list_kernels = False |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # | |
| # Licensed to the Apache Software Foundation (ASF) under one or more | |
| # contributor license agreements. See the NOTICE file distributed with | |
| # this work for additional information regarding copyright ownership. | |
| # The ASF licenses this file to You under the Apache License, Version 2.0 | |
| # (the "License"); you may not use this file except in compliance with | |
| # the License. You may obtain a copy of the License at | |
| # | |
| # http://www.apache.org/licenses/LICENSE-2.0 | |
| # | |
| # Unless required by applicable law or agreed to in writing, software | |
| # distributed under the License is distributed on an "AS IS" BASIS, | |
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
| # See the License for the specific language governing permissions and | |
| # limitations under the License. | |
| # | |
| # Default system properties included when running spark-submit. | |
| # This is useful for setting default environmental settings. | |
| # Example: | |
| # spark.master spark://master:7077 | |
| # spark.eventLog.enabled true | |
| # spark.eventLog.dir hdfs://namenode:8021/directory | |
| # spark.serializer org.apache.spark.serializer.KryoSerializer | |
| # spark.driver.memory 5g | |
| spark.driver.defaultJavaOptions -Dio.netty.tryReflectionSetAccessible=true | |
| spark.driver.maxResultSize 50g |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment