Skip to content

Instantly share code, notes, and snippets.

@CheeseCake87
Last active February 13, 2025 12:18
Show Gist options
  • Select an option

  • Save CheeseCake87/6a6f2388b2fe95c2d5f5895bef95eaba to your computer and use it in GitHub Desktop.

Select an option

Save CheeseCake87/6a6f2388b2fe95c2d5f5895bef95eaba to your computer and use it in GitHub Desktop.
Beautifulsoup4 + Selenium Example
"""
Setup:
python3 -m venv .venv
source .venv/bin/activate
Requirements:
pip install beautifulsoup4 selenium
"""
import json
from bs4 import BeautifulSoup
from pathlib import Path
from selenium import webdriver
from time import sleep
stock = {
"PIM013": 145,
"PIM019": 213,
"PIM054": 477,
"PIM058": 114,
"PIM060": 8,
"PIM063": 141,
"PIM064": 85,
"PIM065": 2,
"PIM086": 2,
"PIM095": 94,
"PIM113": 54,
"PIM132": 50,
"PIM137": 73,
"PIM141": 555,
"PIM167": 28,
"PIM172": 216,
"PIM182": 13,
"PIM216": 15,
"PIM266": 118,
"PIM309": 32,
"PIM320": 1,
"PIM332": 22,
"PIM333": 216,
"PIM336": 192,
"PIM339": 31,
"PIM340": 48,
"PIM341": 52,
"PIM342": 65,
"PIM343": 38,
"PIM344": 18,
"PIM345": 24,
"PIM354": 145,
"PIM401": 446,
"PIM405": 11,
"PIM415": 8,
"PIM450": 58,
"PIM451": 97,
"PIM457": 61,
"PIM540": 255
}
url = "https://shop.pimoroni.com/collections/all?q={item}&retired=true"
cwd = Path().cwd()
def process_urls():
json_output = {}
browser = webdriver.Chrome()
browser.get("https://google.com")
for k, v in stock.items():
browser.get(url.format(item=k))
assert 'All - Pimoroni' in browser.title
sleep(1)
response = browser.page_source
bs = BeautifulSoup(response, "html.parser")
found = bs.find("div", {"class": "title-description"})
if not found:
json_output[k] = {
"item": "Not Found",
"stock": v,
"url": "Not Found"
}
continue
title = found.find("h3")
json_output[k] = {
"item": title.text,
"stock": v,
"url": browser.current_url
}
sleep(1)
json_file = cwd / "stock.json"
json_file.write_text(json.dumps(json_output, indent=1))
def output_json():
json_file = cwd / "stock.json"
load = json.loads(json_file.read_text())
string_val = ""
for k, v in load.items():
string_val += f"{k}: {v['item'].replace(':', '')}\nStock: {v['stock']}\nURL: {v['url']}\n\n"
txt_file = cwd / "stock.txt"
txt_file.write_text(string_val)
if __name__ == '__main__':
process_urls()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment