Skip to content

Instantly share code, notes, and snippets.

@marcusmueller
Created April 11, 2025 10:33
Show Gist options
  • Select an option

  • Save marcusmueller/5e487fdea98bce7f4b53924ee98c3861 to your computer and use it in GitHub Desktop.

Select an option

Save marcusmueller/5e487fdea98bce7f4b53924ee98c3861 to your computer and use it in GitHub Desktop.
#!/usr/bin/env python3
# Copyright 2025 Marcus Müller
# SPDX-License-Identifier: EUPL-1.2
#
from bs4 import BeautifulSoup
from itertools import chain
import logging
import re
import sys
if len(sys.argv) < 3:
logging.error(
"USAGE:\n%s REGEX FILE1 [FILE2…]",
sys.argv[0],
)
sys.exit(1)
regex = re.compile(sys.argv[1])
for filename in sys.argv[2:]:
try:
file_handle = open(filename)
except FileNotFoundError:
continue
try:
soup = BeautifulSoup(file_handle, "html.parser")
findings = []
try:
findings += soup.head.title.find_all(string=regex)
except:
pass
try:
findings += soup.body.find_all(string=regex)
except:
pass
for finding in chain(findings):
print(f"{filename+':':20s}{finding.text}")
except:
pass
finally:
file_handle.close()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment