Skip to content

Instantly share code, notes, and snippets.

@rayyildiz
Created September 15, 2024 10:04
Show Gist options
  • Select an option

  • Save rayyildiz/d1b5e908a068abe9890b9ede18aacfe5 to your computer and use it in GitHub Desktop.

Select an option

Save rayyildiz/d1b5e908a068abe9890b9ede18aacfe5 to your computer and use it in GitHub Desktop.
Merge files
import pandas as pd
from tqdm import tqdm
# List of file names to be processed
file_names = [
"2e522b3e-c97c-472f-9c75-83dec7217079.xlsx",
"2f6e37ac-9b52-4f86-a5dc-1b1cffdea581.xlsx",
"4b0ece5b-9800-4544-8d0b-d1cffa069ad0.xlsx",
"5bc0f302-d03e-444e-916a-61bb2bcde488.xlsx",
"99dceb6d-2ba2-4944-a4e7-dbd7c2994fc4.xlsx",
"8785f01a-193a-4bfc-928e-27bf9c674fcb.xlsx",
"71386e43-0d98-4c22-b03c-ecbc41006d1d.xlsx",
"b30b7d9c-95bb-4d85-8e04-1694bb60f442.xlsx",
"b956fd31-d034-4a74-9f8e-16a31ba650e3.xlsx",
"d2ddebe8-331a-4be5-a42a-cfca3372b090.xlsx",
"dfcc9dea-9f0e-425e-9c36-eac2a6bfed5f.xlsx",
"f7851b00-d7b4-442f-af11-547b1dea3bdd.xlsx",
"ff12987c-c9c0-4ea8-82fc-cf5235dc385f.xlsx",
]
file_names.reverse()
# Initialize empty DataFrames for profiles and posts
df_profiles = pd.DataFrame()
df_posts = pd.DataFrame()
# Process each file and append data to the main DataFrames
for file_name in tqdm(file_names, desc="Processing files", unit="file"):
df_temp_profile = pd.read_excel(f"./data/{file_name}", engine="openpyxl", sheet_name="Profiles")
df_temp_post = pd.read_excel(f"./data/{file_name}", engine="openpyxl", sheet_name="Posts")
df_profiles = df_profiles.append(df_temp_profile)
df_posts = df_posts.append(df_temp_post)
with pd.ExcelWriter('./output/merged.xlsx') as writer:
df_profiles.to_excel(writer, sheet_name='Profiles', index=False)
df_posts.to_excel(writer, sheet_name='Posts', index=False)
print("Data processing complete. Profiles and Posts data have been combined and stored in DataFrames.")
pandas
tqdm
openpyxl
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment