Find all .pdf links on a page and check which ones return non-200 response
$ poetry install
| for i, row in df.iterrows(): | |
| worksheet.write_rich_string(i+1, 2, *visualize_diff(row['original'], row['edited'])) | |
| excel_writer.save() |
| import difflib | |
| def visualize_diff(a, b): | |
| seqm = difflib.SequenceMatcher(None, a, b) | |
| output= [] | |
| for opcode, a0, a1, b0, b1 in seqm.get_opcodes(): | |
| if opcode == 'equal': | |
| output.append(seqm.a[a0:a1]) | |
| elif opcode == 'insert': | |
| output.append(green) |
| sheet_name = 'to_review' | |
| excel_writer = pd.ExcelWriter("edited_questions.xlsx", engine='xlsxwriter') | |
| df.to_excel(excel_writer, sheet_name=sheet_name, index=False) | |
| workbook = excel_writer.book | |
| worksheet = excel_writer.sheets[sheet_name] | |
| green = workbook.add_format({'color': 'green'}) | |
| red = workbook.add_format({'color': 'red'}) | |
| b_green = workbook.add_format({'color': 'green', 'bold': True}) |
| import pandas as pd | |
| df = pd.DataFrame([ | |
| { | |
| 'original': "Can you tell us a bit more abt how scalable your solution is?", | |
| 'edited': "Can you tell us a bit more about how scalable your solution is?", | |
| }, | |
| { | |
| 'original': "What will our priorities be for the next quarter?", | |
| 'edited': "What will our priorities be for the next year?", | |
| }, |
| import xlsxwriter | |
| workbook = xlsxwriter.Workbook('rich_strings.xlsx') | |
| bold = workbook.add_format({'bold': True}) | |
| italic = workbook.add_format({'italic': True}) | |
| worksheet.write_rich_string('A1', | |
| 'This is ', | |
| bold, 'bold', |
| import logging | |
| import os | |
| import json | |
| import urllib3 | |
| import datetime | |
| AIRFLOW_URL = os.environ['AIRFLOW_URL'] | |
| DAG_ID = 'my_helpful_dag' | |
| LOG_LEVEL = os.environ.get('LOG_LEVEL', 'info').upper() |
| import os | |
| import site | |
| from setuptools.command import easy_install | |
| install_path = os.environ['GLUE_INSTALLATION'] | |
| easy_install.main( ["--install-dir", install_path, "torch"] ) | |
| reload(site) | |
| import torch | |
| print(torch.__version__) |
| #!/usr/bin/env python3 | |
| import pandas as pd | |
| import sys | |
| pd.read_csv(sys.argv[1]).to_parquet(sys.argv[2]) |
| from airflow.models import DAG | |
| from airflow.contrib.operators.aws_athena_operator import AWSAthenaOperator | |
| from airflow.operators.s3_file_transform_operator import S3FileTransformOperator | |
| from datetime import datetime | |
| class XComEnabledAWSAthenaOperator(AWSAthenaOperator): | |
| def execute(self, context): | |
| super(XComEnabledAWSAthenaOperator, self).execute(context) | |
| # just so that this gets `xcom_push`(ed) | |
| return self.query_execution_id |