-
-
Save psa-jforestier/1c74330df8e0d1fd6028e75e210e5042 to your computer and use it in GitHub Desktop.
| ### | |
| ### This gist contains 2 files : settings.json and lambda_function.py | |
| ### | |
| ### settings.json | |
| { | |
| "extensions" : ["*.hdr", "*.glb", "*.wasm"] | |
| } | |
| ### lambda_function.py | |
| ''' | |
| This script convert an uncompressed S3 file into a gzipped compressed file. File is replaced, original file is deleted (replaced by the gz version) | |
| Create a role with S3 (read/write) and Cloudwatch Logs access : | |
| { | |
| "Version": "2012-10-17", | |
| "Statement": [ | |
| { | |
| "Sid": "VisualEditor0", | |
| "Effect": "Allow", | |
| "Action": [ | |
| "logs:CreateLogStream", | |
| "s3:*", | |
| "logs:PutLogEvents" | |
| ], | |
| "Resource": "*" | |
| }, | |
| { | |
| "Sid": "VisualEditor1", | |
| "Effect": "Allow", | |
| "Action": "logs:CreateLogGroup", | |
| "Resource": "*" | |
| } | |
| ] | |
| } | |
| Install the Lambda in the region of the Bucket, Python 2.7, 1mn max execution time. | |
| Change "settings.json" to add or remove extension you want to compress | |
| Trigger is S3, PUT event (select the bucket where the lambda apply), output is S3 and Cloudwatch Logs. | |
| How it works : | |
| - on each PUT event (a new file is uploaded on the bucket), an event si sent to the lambda function (note : it doesnt work with a multipart upload). | |
| - the lambda wake up, and analyze the incomming file | |
| - read metadata of the incomming file | |
| - if the file is have the "gzip" HTTP meta ContentEncoding header, it meens it is already compressed, so there is no need to recompress it | |
| - if the file is too small (hard coded : 1024 byte) : no compression | |
| - if the file is not a rucognized extension (see settings.json) : no compression | |
| - if the file pass all this previous check, it is dowloaded locally (in /tmp) | |
| - gzip the local vesion by using the local os "gzip" tool (could be improved by using the internal python gzip feature - TODO) | |
| - overwrite the file in the bucket with the locally gzipped version | |
| - update metadata with previous + ContentEncoding setted to "gzip" | |
| - delete the locally gzipped version | |
| ''' | |
| import json | |
| import pprint | |
| import boto3 | |
| import botocore | |
| import tempfile | |
| import os | |
| import subprocess | |
| import fnmatch | |
| def lambda_handler(event, context): | |
| with open("settings.json") as json_data: | |
| settings = json.load(json_data) | |
| #print "EVENT :" | |
| client = boto3.client('s3') | |
| s3 = boto3.resource('s3') | |
| for r in event.get('Records'): | |
| # pprint.pprint(r) | |
| bucketName = r.get('s3').get('bucket').get('name') | |
| objectKey = r.get('s3').get('object').get('key') | |
| etag = r.get('s3').get('object').get('eTag') | |
| print "Retreiving object :" | |
| print " bucketname = " + bucketName | |
| print " objectKey = " + objectKey | |
| uploadedMeta = client.head_object(Bucket=bucketName, Key=objectKey, IfMatch=etag) | |
| contentEncoding = uploadedMeta.get('ContentEncoding', None) | |
| size = uploadedMeta.get('ContentLength', 0) | |
| print " Current encoding = " + str(contentEncoding) | |
| print " Size = " + str(size) | |
| if (contentEncoding == 'gzip'): | |
| print(" ==> File is already compressed") | |
| return True | |
| match = False | |
| for ext in settings['extensions']: | |
| if fnmatch.fnmatch(objectKey, ext): | |
| match = True | |
| break | |
| if (match == False): | |
| print(" ==> File extension is not activated for compression. See settings.json") | |
| return True | |
| if (size < 1024): | |
| print(" ==> File is too small to be compressed") | |
| return True | |
| tmp_in = tempfile.mkdtemp()+'.orig' | |
| tmp_out = tmp_in + '.gz' # must be .gz because it is gzip default file creation | |
| new_objectKey = objectKey + '.gz' # name in S3 | |
| print("Download content to " + tmp_in + " and gzip it to " + tmp_out) | |
| s3.Bucket(bucketName).download_file(objectKey, tmp_in) | |
| print("GZipping file") | |
| print subprocess.check_output(['gzip', '-v', '-f', '-9', tmp_in]) # gzip command create .gz file | |
| statinfo = os.stat(tmp_out) | |
| newsize = statinfo.st_size | |
| print "New gzipped file = " + str(statinfo.st_size) | |
| if (size - newsize < 1024 ): | |
| print "Compression is not efficient, keep original file" | |
| return True | |
| print "Overwritting S3 file with gzipped version" | |
| # Recreate metadata from original file (including http headers) | |
| # Todo : keep original upload date | |
| extraArgs = { | |
| 'ContentEncoding':"gzip" | |
| } | |
| for m in ['Metadata', 'CacheControl', 'ContentDisposition', 'ContentLanguage', 'ContentType', 'Expires']: | |
| if (uploadedMeta.get(m, None) != None): | |
| extraArgs[m] = uploadedMeta.get(m) | |
| extraArgs['Metadata']['lambda'] = os.environ.get('AWS_LAMBDA_FUNCTION_NAME', '') | |
| extraArgs['Metadata']['originak-size'] = str(size) | |
| s3.Object(bucketName, objectKey).upload_file( | |
| Filename=tmp_out, | |
| ExtraArgs=extraArgs) | |
| # remove local file | |
| os.remove(tmp_out) | |
| return { | |
| 'statusCode': 200, | |
| 'body': 'It works' | |
| } |
not able to see settings.json
Hi - I am getting below error at the actual gzip step. Can you please help with what could be wrong? Should I do any additional imports?
print(subprocess.check_output(['gzip', '-v', '-f', '-9', tmp_in]))
Error log ->
[ERROR] FileNotFoundError: [Errno 2] No such file or directory: 'gzip'
Traceback (most recent call last):
File "/var/task/lambda_function.py", line 42, in lambda_handler
print(subprocess.check_output(['gzip', '-v', '-f', '-9', tmp_in])) # gzip command create .gz file
File "/var/lang/lib/python3.8/subprocess.py", line 411, in check_output
return run(*popenargs, stdout=PIPE, timeout=timeout, check=True,
File "/var/lang/lib/python3.8/subprocess.py", line 489, in run
with Popen(*popenargs, **kwargs) as process:
File "/var/lang/lib/python3.8/subprocess.py", line 854, in init
self._execute_child(args, executable, preexec_fn, close_fds,
File "/var/lang/lib/python3.8/subprocess.py", line 1702, in _execute_child
raise child_exception_type(errno_num, err_msg, err_filename)
[ERROR] FileNotFoundError: [Errno 2] No such file or directory: 'gzip'
It means there is no gzip executable in the path. Maybe AWS change something on the underlying Linux behind Lambda. Try to find where the gzip program is located (/bin/gzip or similar)
suggest usingtmp.close() # deletes temp file
and use delete flag when creating temp files tempfile.NamedTemporaryFile(delete=True)
@bhimpel-mediware : what says the Lambda log ? is the huge JS file detected when droped ? Maybe with huge file, S3 put files block-by-block, and the PUT event is not triggered.