Created
August 5, 2025 14:50
-
-
Save rok/042d59b2fbf81034a63cb815e17d6daf to your computer and use it in GitHub Desktop.
Create uniformly encrypted parquet file with PyArrow and attempt to read it with DuckDB
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| { | |
| "cells": [ | |
| { | |
| "cell_type": "code", | |
| "execution_count": 1, | |
| "id": "3ac10ec1-5775-4abd-ab9f-d94cde3c95f0", | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "pyarrow 21.0.0\n" | |
| ] | |
| } | |
| ], | |
| "source": [ | |
| "import pyarrow.parquet.encryption as pe\n", | |
| "import pyarrow.parquet as pq\n", | |
| "import pyarrow as pa\n", | |
| "from pyarrow.tests.parquet.encryption import InMemoryKmsClient\n", | |
| "print(\"pyarrow\", pa.__version__)\n", | |
| "\n", | |
| "def kms_factory(kms_connection_configuration):\n", | |
| " return InMemoryKmsClient(kms_connection_configuration)\n", | |
| "\n", | |
| "kms_connection_config = pe.KmsConnectionConfig(\n", | |
| " custom_kms_conf = {\"kf\": \"0123456789112345\"}\n", | |
| ")\n", | |
| "crypto_factory = pe.CryptoFactory(kms_factory)\n", | |
| "\n", | |
| "encryption_config = pe.EncryptionConfiguration(\n", | |
| " footer_key=\"kf\",\n", | |
| " encryption_algorithm=\"AES_GCM_V1\",\n", | |
| " uniform_encryption=True,\n", | |
| ")\n", | |
| "encryption_properties = crypto_factory.file_encryption_properties(kms_connection_config, encryption_config)\n", | |
| "\n", | |
| "table = pa.table({\"letter\": [\"a\", \"b\", \"c\"]})\n", | |
| "\n", | |
| "with pq.ParquetWriter(\"encrypted.parquet\", table.schema, encryption_properties=encryption_properties) as writer:\n", | |
| " writer.write_table(table) " | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 2, | |
| "id": "ebd568f3-2c8c-4b29-9f6b-c124cdb25a9a", | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "pyarrow 21.0.0\n" | |
| ] | |
| }, | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "pyarrow.Table\n", | |
| "letter: string\n", | |
| "----\n", | |
| "letter: [[\"a\",\"b\",\"c\"]]" | |
| ] | |
| }, | |
| "execution_count": 2, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "import pyarrow.parquet.encryption as pe\n", | |
| "import pyarrow.parquet as pq\n", | |
| "from pyarrow.tests.parquet.encryption import InMemoryKmsClient\n", | |
| "print(\"pyarrow\", pa.__version__)\n", | |
| "\n", | |
| "def kms_factory(kms_connection_configuration):\n", | |
| " return InMemoryKmsClient(kms_connection_configuration)\n", | |
| "\n", | |
| "kms_connection_config = pe.KmsConnectionConfig(\n", | |
| " custom_kms_conf = {\"kf\": \"0123456789112345\"}\n", | |
| ")\n", | |
| "crypto_factory = pe.CryptoFactory(kms_factory)\n", | |
| "\n", | |
| "decryption_properties = crypto_factory.file_decryption_properties(kms_connection_config)\n", | |
| "\n", | |
| "result = pq.ParquetFile(\"encrypted.parquet\", decryption_properties=decryption_properties)\n", | |
| "result.read()" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 3, | |
| "id": "f6e1bd6b-bc30-422a-b6b7-140459b1534e", | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "duckdb 1.3.2\n" | |
| ] | |
| }, | |
| { | |
| "ename": "InvalidInputException", | |
| "evalue": "Invalid Input Error: Computed AES tag differs from read AES tag, are you using the right key?", | |
| "output_type": "error", | |
| "traceback": [ | |
| "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", | |
| "\u001b[0;31mInvalidInputException\u001b[0m Traceback (most recent call last)", | |
| "Cell \u001b[0;32mIn[3], line 4\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21;01mduckdb\u001b[39;00m\n\u001b[1;32m 2\u001b[0m \u001b[38;5;28mprint\u001b[39m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mduckdb\u001b[39m\u001b[38;5;124m\"\u001b[39m, duckdb\u001b[38;5;241m.\u001b[39m__version__)\n\u001b[0;32m----> 4\u001b[0m \u001b[43mduckdb\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43msql\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m\"\"\"\u001b[39;49m\n\u001b[1;32m 5\u001b[0m \u001b[38;5;124;43m PRAGMA add_parquet_key(\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mkf\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43m, \u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43m0123456789112345\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43m);\u001b[39;49m\n\u001b[1;32m 6\u001b[0m \u001b[38;5;124;43m SELECT *\u001b[39;49m\n\u001b[1;32m 7\u001b[0m \u001b[38;5;124;43m FROM read_parquet(\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mencrypted.parquet\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43m, encryption_config = \u001b[39;49m\u001b[38;5;124;43m{\u001b[39;49m\u001b[38;5;124;43mfooter_key: \u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mkf\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43m});\u001b[39;49m\n\u001b[1;32m 8\u001b[0m \u001b[38;5;124;43m\"\"\"\u001b[39;49m\u001b[43m)\u001b[49m\n", | |
| "\u001b[0;31mInvalidInputException\u001b[0m: Invalid Input Error: Computed AES tag differs from read AES tag, are you using the right key?" | |
| ] | |
| } | |
| ], | |
| "source": [ | |
| "import duckdb\n", | |
| "print(\"duckdb\", duckdb.__version__)\n", | |
| "\n", | |
| "duckdb.sql(\"\"\"\n", | |
| " PRAGMA add_parquet_key('kf', '0123456789112345');\n", | |
| " SELECT *\n", | |
| " FROM read_parquet('encrypted.parquet', encryption_config = {footer_key: 'kf'});\n", | |
| "\"\"\")" | |
| ] | |
| } | |
| ], | |
| "metadata": { | |
| "kernelspec": { | |
| "display_name": "Python 3 (ipykernel)", | |
| "language": "python", | |
| "name": "python3" | |
| }, | |
| "language_info": { | |
| "codemirror_mode": { | |
| "name": "ipython", | |
| "version": 3 | |
| }, | |
| "file_extension": ".py", | |
| "mimetype": "text/x-python", | |
| "name": "python", | |
| "nbconvert_exporter": "python", | |
| "pygments_lexer": "ipython3", | |
| "version": "3.10.18" | |
| } | |
| }, | |
| "nbformat": 4, | |
| "nbformat_minor": 5 | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment