Tanveer Ahmad tahashmi

## arrow_flight_dremio.py
from collections import namedtuple
import vaex
import time

import orjson

import os
import psutil
from pyarrow import flight
import pyarrow as pa

## arrow_flight_dremio.py
from collections import namedtuple
import vaex
import time

import orjson

import os
import psutil
from pyarrow import flight
import pyarrow as pa

## SLURMCluster_vs_Singularity.ipynb

      
              1 file
            
          
              3 forks
            
          
                21 comments
              
            
              11 stars
            
          
                willirath
                / SLURMCluster_vs_Singularity.ipynb
            
            
              Last active
              July 30, 2025 13:57
            
              
                Dask-Jobqueue SLURMCluster with Singularity
              
          
      Loading

      Sorry, something went wrong. Reload?
      Sorry, we cannot display this file.
      Sorry, this file is invalid so it cannot be displayed.
      
          Viewer requires iframe.
      
    
## ubuntu-install-dremio.sh
wget https://download.dremio.com/odbc-driver/1.4.2.1003/dremio-odbc-1.4.2.1003-1.x86_64.rpm
sudo apt-get install alien unixodbc-dev -y
sudo alien dremio-odbc-1.4.2.1003-1.x86_64.rpm
sudo dpkg -i dremio-odbc_1.4.2.1003-2_amd64.deb

## cuml-kmeans-mnmg-api.md

      
              1 file
            
          
              0 forks
            
          
                1 comment
              
            
              3 stars
            
          
                cjnolet
                / cuml-kmeans-mnmg-api.md
            
            
              Last active
              October 11, 2024 10:54
            
              
                Simple example of cuML's K-Means Single-GPU (SG) and Multi-Node Multi-GPU (MNMG) APIs compared to Scikit-learn and Dask-ML
              
          
    Comparing cuML K-Means API Against Scikit-learn & Dask-ML

First, a quick code example of K-Means in Scikit-learn
from sklearn.datasets import make_blobs
from sklearn.cluster import KMeans

n_centers = 5

X, _ = make_blobs(n_samples=10000, n_centers=n_centers)

  
## run_spark_cluster.sh
#!/bin/bash
#SBATCH --job-name spark-cluster
#SBATCH --account=qh82
#SBATCH --time=02:00:00
# --- Master resources ---
#SBATCH --nodes=1
#SBATCH --mem-per-cpu=1G
#SBATCH --cpus-per-task=1
#SBATCH --ntasks-per-node=1
# --- Worker resources ---

## arrow-build.sh
#!/usr/bin/env bash

set -eu

PWD="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
SRC_DIR=$(realpath "${PWD}/..")
CXX_SRC=${SRC_DIR}/cpp

# The following can be set
: "${CMAKE:=cmake}"

## cuda_installation_on_ubuntu_18.04
#!/bin/bash
## This gist contains step by step instructions to install cuda v9.0 and cudnn 7.3 in ubuntu 18.04

### steps ####
# verify the system has a cuda-capable gpu
# download and install the nvidia cuda toolkit and cudnn
# setup environmental variables
# verify the installation
###

## PySpark DataFrame from many small pandas DataFrames.ipynb

      
              2 files
            
          
              4 forks
            
          
                9 comments
              
            
              4 stars
            
          
                linar-jether
                / PySpark DataFrame from many small pandas DataFrames.ipynb
            
            
              Created
              July 8, 2018 10:15
            
              
                Convert a RDD of pandas DataFrames to a single Spark DataFrame using Arrow and without collecting all data in the driver.
              
          
      Loading

      Sorry, something went wrong. Reload?
      Sorry, we cannot display this file.
      Sorry, this file is invalid so it cannot be displayed.
      
          Viewer requires iframe.
      
    
## dask-xgboost-airlines.ipynb

      
              1 file
            
          
              10 forks
            
          
                5 comments
              
            
              10 stars
            
          
                mrocklin
                / dask-xgboost-airlines.ipynb
            
            
              Created
              February 21, 2017 00:34
            
          
      Loading

      Sorry, something went wrong. Reload?
      Sorry, we cannot display this file.
      Sorry, this file is invalid so it cannot be displayed.
      
          Viewer requires iframe.
	from collections import namedtuple
	import vaex
	import time

	import orjson

	import os
	import psutil
	from pyarrow import flight
	import pyarrow as pa
	wget https://download.dremio.com/odbc-driver/1.4.2.1003/dremio-odbc-1.4.2.1003-1.x86_64.rpm
	sudo apt-get install alien unixodbc-dev -y
	sudo alien dremio-odbc-1.4.2.1003-1.x86_64.rpm
	sudo dpkg -i dremio-odbc_1.4.2.1003-2_amd64.deb
	#!/bin/bash
	#SBATCH --job-name spark-cluster
	#SBATCH --account=qh82
	#SBATCH --time=02:00:00
	# --- Master resources ---
	#SBATCH --nodes=1
	#SBATCH --mem-per-cpu=1G
	#SBATCH --cpus-per-task=1
	#SBATCH --ntasks-per-node=1
	# --- Worker resources ---
	#!/usr/bin/env bash

	set -eu

	PWD="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
	SRC_DIR=$(realpath "${PWD}/..")
	CXX_SRC=${SRC_DIR}/cpp

	# The following can be set
	: "${CMAKE:=cmake}"
	#!/bin/bash
	## This gist contains step by step instructions to install cuda v9.0 and cudnn 7.3 in ubuntu 18.04

	### steps ####
	# verify the system has a cuda-capable gpu
	# download and install the nvidia cuda toolkit and cudnn
	# setup environmental variables
	# verify the installation
	###