Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Select an option

  • Save rok/042d59b2fbf81034a63cb815e17d6daf to your computer and use it in GitHub Desktop.

Select an option

Save rok/042d59b2fbf81034a63cb815e17d6daf to your computer and use it in GitHub Desktop.
Create uniformly encrypted parquet file with PyArrow and attempt to read it with DuckDB
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"id": "3ac10ec1-5775-4abd-ab9f-d94cde3c95f0",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"pyarrow 21.0.0\n"
]
}
],
"source": [
"import pyarrow.parquet.encryption as pe\n",
"import pyarrow.parquet as pq\n",
"import pyarrow as pa\n",
"from pyarrow.tests.parquet.encryption import InMemoryKmsClient\n",
"print(\"pyarrow\", pa.__version__)\n",
"\n",
"def kms_factory(kms_connection_configuration):\n",
" return InMemoryKmsClient(kms_connection_configuration)\n",
"\n",
"kms_connection_config = pe.KmsConnectionConfig(\n",
" custom_kms_conf = {\"kf\": \"0123456789112345\"}\n",
")\n",
"crypto_factory = pe.CryptoFactory(kms_factory)\n",
"\n",
"encryption_config = pe.EncryptionConfiguration(\n",
" footer_key=\"kf\",\n",
" encryption_algorithm=\"AES_GCM_V1\",\n",
" uniform_encryption=True,\n",
")\n",
"encryption_properties = crypto_factory.file_encryption_properties(kms_connection_config, encryption_config)\n",
"\n",
"table = pa.table({\"letter\": [\"a\", \"b\", \"c\"]})\n",
"\n",
"with pq.ParquetWriter(\"encrypted.parquet\", table.schema, encryption_properties=encryption_properties) as writer:\n",
" writer.write_table(table) "
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "ebd568f3-2c8c-4b29-9f6b-c124cdb25a9a",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"pyarrow 21.0.0\n"
]
},
{
"data": {
"text/plain": [
"pyarrow.Table\n",
"letter: string\n",
"----\n",
"letter: [[\"a\",\"b\",\"c\"]]"
]
},
"execution_count": 2,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import pyarrow.parquet.encryption as pe\n",
"import pyarrow.parquet as pq\n",
"from pyarrow.tests.parquet.encryption import InMemoryKmsClient\n",
"print(\"pyarrow\", pa.__version__)\n",
"\n",
"def kms_factory(kms_connection_configuration):\n",
" return InMemoryKmsClient(kms_connection_configuration)\n",
"\n",
"kms_connection_config = pe.KmsConnectionConfig(\n",
" custom_kms_conf = {\"kf\": \"0123456789112345\"}\n",
")\n",
"crypto_factory = pe.CryptoFactory(kms_factory)\n",
"\n",
"decryption_properties = crypto_factory.file_decryption_properties(kms_connection_config)\n",
"\n",
"result = pq.ParquetFile(\"encrypted.parquet\", decryption_properties=decryption_properties)\n",
"result.read()"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "f6e1bd6b-bc30-422a-b6b7-140459b1534e",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"duckdb 1.3.2\n"
]
},
{
"ename": "InvalidInputException",
"evalue": "Invalid Input Error: Computed AES tag differs from read AES tag, are you using the right key?",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mInvalidInputException\u001b[0m Traceback (most recent call last)",
"Cell \u001b[0;32mIn[3], line 4\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21;01mduckdb\u001b[39;00m\n\u001b[1;32m 2\u001b[0m \u001b[38;5;28mprint\u001b[39m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mduckdb\u001b[39m\u001b[38;5;124m\"\u001b[39m, duckdb\u001b[38;5;241m.\u001b[39m__version__)\n\u001b[0;32m----> 4\u001b[0m \u001b[43mduckdb\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43msql\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m\"\"\"\u001b[39;49m\n\u001b[1;32m 5\u001b[0m \u001b[38;5;124;43m PRAGMA add_parquet_key(\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mkf\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43m, \u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43m0123456789112345\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43m);\u001b[39;49m\n\u001b[1;32m 6\u001b[0m \u001b[38;5;124;43m SELECT *\u001b[39;49m\n\u001b[1;32m 7\u001b[0m \u001b[38;5;124;43m FROM read_parquet(\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mencrypted.parquet\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43m, encryption_config = \u001b[39;49m\u001b[38;5;124;43m{\u001b[39;49m\u001b[38;5;124;43mfooter_key: \u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mkf\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43m});\u001b[39;49m\n\u001b[1;32m 8\u001b[0m \u001b[38;5;124;43m\"\"\"\u001b[39;49m\u001b[43m)\u001b[49m\n",
"\u001b[0;31mInvalidInputException\u001b[0m: Invalid Input Error: Computed AES tag differs from read AES tag, are you using the right key?"
]
}
],
"source": [
"import duckdb\n",
"print(\"duckdb\", duckdb.__version__)\n",
"\n",
"duckdb.sql(\"\"\"\n",
" PRAGMA add_parquet_key('kf', '0123456789112345');\n",
" SELECT *\n",
" FROM read_parquet('encrypted.parquet', encryption_config = {footer_key: 'kf'});\n",
"\"\"\")"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.18"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment