First, a quick code example of K-Means in Scikit-learn
from sklearn.datasets import make_blobs
from sklearn.cluster import KMeans
n_centers = 5
X, _ = make_blobs(n_samples=10000, n_centers=n_centers)| from collections import namedtuple | |
| import vaex | |
| import time | |
| import orjson | |
| import os | |
| import psutil | |
| from pyarrow import flight | |
| import pyarrow as pa |
| from collections import namedtuple | |
| import vaex | |
| import time | |
| import orjson | |
| import os | |
| import psutil | |
| from pyarrow import flight | |
| import pyarrow as pa |
| wget https://download.dremio.com/odbc-driver/1.4.2.1003/dremio-odbc-1.4.2.1003-1.x86_64.rpm | |
| sudo apt-get install alien unixodbc-dev -y | |
| sudo alien dremio-odbc-1.4.2.1003-1.x86_64.rpm | |
| sudo dpkg -i dremio-odbc_1.4.2.1003-2_amd64.deb |
| #!/bin/bash | |
| #SBATCH --job-name spark-cluster | |
| #SBATCH --account=qh82 | |
| #SBATCH --time=02:00:00 | |
| # --- Master resources --- | |
| #SBATCH --nodes=1 | |
| #SBATCH --mem-per-cpu=1G | |
| #SBATCH --cpus-per-task=1 | |
| #SBATCH --ntasks-per-node=1 | |
| # --- Worker resources --- |
| #!/usr/bin/env bash | |
| set -eu | |
| PWD="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" | |
| SRC_DIR=$(realpath "${PWD}/..") | |
| CXX_SRC=${SRC_DIR}/cpp | |
| # The following can be set | |
| : "${CMAKE:=cmake}" |
| #!/bin/bash | |
| ## This gist contains step by step instructions to install cuda v9.0 and cudnn 7.3 in ubuntu 18.04 | |
| ### steps #### | |
| # verify the system has a cuda-capable gpu | |
| # download and install the nvidia cuda toolkit and cudnn | |
| # setup environmental variables | |
| # verify the installation | |
| ### |