Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Select an option

  • Save AntreasAntoniou/dd89f74b83a307eab41444e5900e58ed to your computer and use it in GitHub Desktop.

Select an option

Save AntreasAntoniou/dd89f74b83a307eab41444e5900e58ed to your computer and use it in GitHub Desktop.
import pyarrow.parquet as pq
import pyarrow as pa
import pyarrow.dataset as ds
import numpy as np
import pandas as pd
import pathlib
from rich import print
import tqdm
total_iters = 10000
with tqdm.tqdm(total=total_iters) as pbar:
for i in range(total_iters):
if pathlib.Path("test.parquet").exists() and i == 0:
pathlib.Path("test.parquet").unlink()
table_entry = pa.table(
[[i**2], [i**3], [i**4]], names=["square", "cube", "fourth_power"]
)
if pathlib.Path("test.parquet").exists():
table = pq.read_table("test.parquet")
table = pa.concat_tables(
[
table,
table_entry,
]
)
else:
table = table_entry
pq.write_table(table, "test.parquet")
pbar.update(1)
pbar.set_description(f"{len(table)} entries")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment