Skip to content

Instantly share code, notes, and snippets.

@Birappa87
Created November 2, 2023 19:53
Show Gist options
  • Select an option

  • Save Birappa87/778146bfea21c6cf18e5ff89cb69c826 to your computer and use it in GitHub Desktop.

Select an option

Save Birappa87/778146bfea21c6cf18e5ff89cb69c826 to your computer and use it in GitHub Desktop.
def dump_data(df, choice):
connection_string = f'postgresql://{user_name}:{password}@{host}:{port}/{db_name}'
# Create a database engine
engine = create_engine(connection_string)
# Create a sessionmaker
Session = sessionmaker(bind=engine)
if choice == 'CreditSpreadFile':
df = pd.read_csv('data\credit_spread.csv')
# Write DataFrame to PostgreSQL database using SQLAlchemy
df.to_sql('Optionsplay_Credit_spread', engine, if_exists='replace', index=False)
elif choice == 'coveredCalls':
df = pd.read_csv('data\covered_calls.csv')
df.to_sql('Optionsplay_Covered_calls', engine, if_exists='replace', index=False)
else:
df = pd.read_csv('data\shortput.csv')
df.to_sql('OptionsPlay_short_put', engine, if_exists='replace', index=False)
def parse_data(html, choice):
'''Extract the data table'''
result = subprocess.run(["playwright", "install"], stdout=subprocess.PIPE, stderr=subprocess.PIPE)
soup = BeautifulSoup(html, 'html.parser')
# print(soup.prettify())
#Extracting table from table
table = soup.select(f'table#{choice}')[0]
# print(table)
columns = table.find('thead').find_all('th')
print(len(columns))
df = pd.read_html(str(table))[0]
#Removing last row
df = df.iloc[:-1,:]
for col in df.columns:
if df[col].dtype == object:
df[col] = df[col].str[:255]
print(df.head())
#saving into database
return df
def extract_data(url, choice):
'''Extract the HTML code'''
with sync_playwright() as p:
browser = p.chromium.launch(headless=True, slow_mo=50, chromium_sandbox = False)
page = browser.new_page()
page.goto(url=url)
page.fill(
'input#Login', 'your_mailID'
)
page.fill(
'input#Password', 'password'
)
#click on submit button
page.click("button[type=submit]")
# Wait to load the HTML template
try:
page.wait_for_timeout(30000)
except:
page.wait_for_timeout(60000)
if choice == 'CreditSpreadFile':
html = page.inner_html('//*[@id="CreditSpreadFile_wrapper"]')
elif choice == 'coveredCalls':
html = page.inner_html('//*[@id="coveredCalls_wrapper"]')
else:
html = page.inner_html('//*[@id="shortPuts_wrapper"]')
return html
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment