Created
November 12, 2024 18:15
-
-
Save lalitsingh24x7/63f1b60180e046994281d8a21ee99866 to your computer and use it in GitHub Desktop.
glue
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import sys | |
| from awsglue.context import GlueContext | |
| from pyspark.context import SparkContext | |
| from awsglue.dynamicframe import DynamicFrame | |
| # Initialize Glue context | |
| sc = SparkContext() | |
| glueContext = GlueContext(sc) | |
| # Read the CSV file from S3 | |
| input_path = "s3://your-bucket/your-file.csv" | |
| dynamic_frame = glueContext.create_dynamic_frame.from_options( | |
| format="csv", | |
| connection_type="s3", | |
| connection_options={"paths": [input_path]}, | |
| format_options={"withHeader": True} | |
| ) | |
| # Convert DynamicFrame to DataFrame for transformations | |
| df = dynamic_frame.toDF() | |
| # Perform your data transformation (e.g., group by 'date') | |
| grouped_df = df.groupBy("date").agg( | |
| # example aggregation, replace with your actual logic | |
| sum("transaction_amount").alias("total_amount") | |
| ) | |
| # Write out partitioned data | |
| output_path = "s3://your-bucket/output/" | |
| grouped_df.write.partitionBy("date").csv(output_path, header=True) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment