wesm/CInterfaceExample.ipynb

## CInterfaceExample.ipynb
{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import pyarrow as pa\n",
    "\n",
    "from pyarrow.cffi import ffi"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "pyarrow.RecordBatch\n",
       "a: int64\n",
       "b: string"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df = pd.DataFrame({'a': [1, 2, 3, 4, 5],\n",
    "                   'b': ['a', 'b', 'c', 'd', 'e']})\n",
    "\n",
    "rb = pa.record_batch(df)\n",
    "rb"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Export pyarrow.RecordBatch to C Interface"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {},
   "outputs": [],
   "source": [
    "c_schema = ffi.new(\"struct ArrowSchema*\")\n",
    "c_schema_ptr = int(ffi.cast(\"uintptr_t\", c_schema))\n",
    "\n",
    "# NB: RecordBatch is packed as a StructArray\n",
    "c_batch = arrow_c.new(\"struct ArrowArray*\")\n",
    "c_batch_ptr = int(ffi.cast(\"uintptr_t\", c_batch))\n",
    "\n",
    "rb.schema._export_to_c(c_schema_ptr)\n",
    "rb._export_to_c(c_batch_ptr)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Import pyarrow.RecordBatch given addresses of ArrowSchema, ArrowArray"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Deserialize schema\n",
    "schema2 = pa.Schema._import_from_c(c_schema_ptr)\n",
    "\n",
    "# Deserialize batch\n",
    "rb2 = pa.RecordBatch._import_from_c(c_batch_ptr, schema2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "True"
      ]
     },
     "execution_count": 24,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "rb.equals(rb2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>a</th>\n",
       "      <th>b</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>a</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2</td>\n",
       "      <td>b</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>3</td>\n",
       "      <td>c</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>4</td>\n",
       "      <td>d</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>5</td>\n",
       "      <td>e</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   a  b\n",
       "0  1  a\n",
       "1  2  b\n",
       "2  3  c\n",
       "3  4  d\n",
       "4  5  e"
      ]
     },
     "execution_count": 25,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "rb2.to_pandas()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.6"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
	{
	"cells": [
	{
	"cell_type": "code",
	"execution_count": 16,
	"metadata": {},
	"outputs": [],
	"source": [
	"import pandas as pd\n",
	"import pyarrow as pa\n",
	"\n",
	"from pyarrow.cffi import ffi"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 3,
	"metadata": {},
	"outputs": [
	{
	"data": {
	"text/plain": [
	"pyarrow.RecordBatch\n",
	"a: int64\n",
	"b: string"
	]
	},
	"execution_count": 3,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"df = pd.DataFrame({'a': [1, 2, 3, 4, 5],\n",
	" 'b': ['a', 'b', 'c', 'd', 'e']})\n",
	"\n",
	"rb = pa.record_batch(df)\n",
	"rb"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"## Export pyarrow.RecordBatch to C Interface"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 22,
	"metadata": {},
	"outputs": [],
	"source": [
	"c_schema = ffi.new(\"struct ArrowSchema*\")\n",
	"c_schema_ptr = int(ffi.cast(\"uintptr_t\", c_schema))\n",
	"\n",
	"# NB: RecordBatch is packed as a StructArray\n",
	"c_batch = arrow_c.new(\"struct ArrowArray*\")\n",
	"c_batch_ptr = int(ffi.cast(\"uintptr_t\", c_batch))\n",
	"\n",
	"rb.schema._export_to_c(c_schema_ptr)\n",
	"rb._export_to_c(c_batch_ptr)"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"## Import pyarrow.RecordBatch given addresses of ArrowSchema, ArrowArray"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 23,
	"metadata": {},
	"outputs": [],
	"source": [
	"# Deserialize schema\n",
	"schema2 = pa.Schema._import_from_c(c_schema_ptr)\n",
	"\n",
	"# Deserialize batch\n",
	"rb2 = pa.RecordBatch._import_from_c(c_batch_ptr, schema2)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 24,
	"metadata": {},
	"outputs": [
	{
	"data": {
	"text/plain": [
	"True"
	]
	},
	"execution_count": 24,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"rb.equals(rb2)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 25,
	"metadata": {},
	"outputs": [
	{
	"data": {
	"text/html": [
	"<div>\n",
	"<style scoped>\n",
	" .dataframe tbody tr th:only-of-type {\n",
	" vertical-align: middle;\n",
	" }\n",
	"\n",
	" .dataframe tbody tr th {\n",
	" vertical-align: top;\n",
	" }\n",
	"\n",
	" .dataframe thead th {\n",
	" text-align: right;\n",
	" }\n",
	"</style>\n",
	"<table border=\"1\" class=\"dataframe\">\n",
	" <thead>\n",
	" <tr style=\"text-align: right;\">\n",
	" <th></th>\n",
	" <th>a</th>\n",
	" <th>b</th>\n",
	" </tr>\n",
	" </thead>\n",
	" <tbody>\n",
	" <tr>\n",
	" <th>0</th>\n",
	" <td>1</td>\n",
	" <td>a</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>1</th>\n",
	" <td>2</td>\n",
	" <td>b</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>2</th>\n",
	" <td>3</td>\n",
	" <td>c</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>3</th>\n",
	" <td>4</td>\n",
	" <td>d</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>4</th>\n",
	" <td>5</td>\n",
	" <td>e</td>\n",
	" </tr>\n",
	" </tbody>\n",
	"</table>\n",
	"</div>"
	],
	"text/plain": [
	" a b\n",
	"0 1 a\n",
	"1 2 b\n",
	"2 3 c\n",
	"3 4 d\n",
	"4 5 e"
	]
	},
	"execution_count": 25,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"rb2.to_pandas()"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {},
	"outputs": [],
	"source": []
	}
	],
	"metadata": {
	"kernelspec": {
	"display_name": "Python 3",
	"language": "python",
	"name": "python3"
	},
	"language_info": {
	"codemirror_mode": {
	"name": "ipython",
	"version": 3
	},
	"file_extension": ".py",
	"mimetype": "text/x-python",
	"name": "python",
	"nbconvert_exporter": "python",
	"pygments_lexer": "ipython3",
	"version": "3.7.6"
	}
	},
	"nbformat": 4,
	"nbformat_minor": 4
	}
No results found