Skip to content

Instantly share code, notes, and snippets.

@VibhuJawa
Created October 3, 2019 02:43
Show Gist options
  • Select an option

  • Save VibhuJawa/69a7831a2231abb56db8a0f33983b923 to your computer and use it in GitHub Desktop.

Select an option

Save VibhuJawa/69a7831a2231abb56db8a0f33983b923 to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import xgboost \n",
"import dask_cudf\n",
"from dask.distributed import Client, wait\n",
"import cudf\n",
"import dask"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<table style=\"border: 2px solid white;\">\n",
"<tr>\n",
"<td style=\"vertical-align: top; border: 0px solid white\">\n",
"<h3 style=\"text-align: left;\">Client</h3>\n",
"<ul style=\"text-align: left; list-style: none; margin: 0; padding: 0;\">\n",
" <li><b>Scheduler: </b>tcp://172.17.0.3:8786</li>\n",
" <li><b>Dashboard: </b><a href='http://172.17.0.3:8787/status' target='_blank'>http://172.17.0.3:8787/status</a>\n",
"</ul>\n",
"</td>\n",
"<td style=\"vertical-align: top; border: 0px solid white\">\n",
"<h3 style=\"text-align: left;\">Cluster</h3>\n",
"<ul style=\"text-align: left; list-style:none; margin: 0; padding: 0;\">\n",
" <li><b>Workers: </b>8</li>\n",
" <li><b>Cores: </b>8</li>\n",
" <li><b>Memory: </b>0 B</li>\n",
"</ul>\n",
"</td>\n",
"</tr>\n",
"</table>"
],
"text/plain": [
"<Client: 'tcp://172.17.0.3:8786' processes=0 threads=0, memory=0 B>"
]
},
"execution_count": 2,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"client = Client('172.17.0.3:8786')\n",
"client.restart()"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"DoneAndNotDoneFutures(done={<Future: status: finished, type: DataFrame, key: ('from_pandas-556f0cad77a6f90e109b63f7cf36b5c0', 4)>, <Future: status: finished, type: DataFrame, key: ('from_pandas-556f0cad77a6f90e109b63f7cf36b5c0', 2)>, <Future: status: finished, type: DataFrame, key: ('from_pandas-556f0cad77a6f90e109b63f7cf36b5c0', 7)>, <Future: status: finished, type: DataFrame, key: ('from_pandas-556f0cad77a6f90e109b63f7cf36b5c0', 5)>, <Future: status: finished, type: DataFrame, key: ('from_pandas-556f0cad77a6f90e109b63f7cf36b5c0', 3)>, <Future: status: finished, type: DataFrame, key: ('from_pandas-556f0cad77a6f90e109b63f7cf36b5c0', 1)>, <Future: status: finished, type: DataFrame, key: ('from_pandas-556f0cad77a6f90e109b63f7cf36b5c0', 0)>, <Future: status: finished, type: DataFrame, key: ('from_pandas-556f0cad77a6f90e109b63f7cf36b5c0', 6)>}, not_done=set())"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df = dask_cudf.from_cudf(cudf.DataFrame({'x':[1,2]*16,'y':[0,1]*16}),npartitions=8)\n",
"df = df.persist()\n",
"wait(df)"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div><strong>Dask DataFrame Structure:</strong></div>\n",
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>x</th>\n",
" </tr>\n",
" <tr>\n",
" <th>npartitions=8</th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>int64</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>28</th>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>31</th>\n",
" <td>...</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>\n",
"<div>Dask Name: getitem, 16 tasks</div>"
],
"text/plain": [
"<dask_cudf.DataFrame | 16 tasks | 8 npartitions>"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df[['x']]"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
"def create_dmatrix_from_cudf(df,non_label_columns, label_columns = ['label']):\n",
" \"\"\"\n",
" \n",
" \"\"\"\n",
" gpu_dfs = [(gpu_df[non_label_columns], gpu_df[label_columns]) for gpu_df in df.to_delayed()]\n",
" split_gpu_dfs = [(gpu_df[0].perist(), gpu_df[1].persist()) for gpu_df in gpu_dfs]\n",
" dmat_ls = [dask.delayed(xgboost.DMatrix)(datata = df_tup[0], label = df_tup[1]) for df_tup in split_gpu_dfs]\n",
" wait(dmat_ls)\n",
" return dmat_ls"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [],
"source": [
"dmat = create_dmatrix_from_cudf(df,non_label_columns = ['x'], \n",
" label_columns = ['y'])"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"[Delayed('DMatrix-32259e90-e0d8-4334-ace6-966265830639'),\n",
" Delayed('DMatrix-6ce18521-3507-4ab0-b5ed-494b3b9f6ce4'),\n",
" Delayed('DMatrix-00b14db9-a81d-4962-ba62-a93340a3650f'),\n",
" Delayed('DMatrix-4ab74b1c-11d6-4577-b02c-fde4d48e79cd'),\n",
" Delayed('DMatrix-e2841a80-0868-46ee-ae8e-52b6c75f49e5'),\n",
" Delayed('DMatrix-ce9b5442-6520-46e6-b3d4-a6f4b211be9b'),\n",
" Delayed('DMatrix-40c4c115-a647-4b53-be4d-f92e1c26364c'),\n",
" Delayed('DMatrix-f2844833-d744-4963-83a0-fc521f04df8e')]"
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"dmat"
]
},
{
"cell_type": "code",
"execution_count": 18,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<b>Future: perist</b> <font color=\"gray\">status: </font><font color=\"red\">error</font>, <font color=\"gray\">key: </font>perist-80d545f6-5d73-403e-81cd-bf1fe0d04a86"
],
"text/plain": [
"<Future: status: error, key: perist-80d545f6-5d73-403e-81cd-bf1fe0d04a86>"
]
},
"execution_count": 18,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"client.compute(dmat[0][0])\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.3"
}
},
"nbformat": 4,
"nbformat_minor": 4
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment