Skip to content

Instantly share code, notes, and snippets.

@ldotlopez
Created September 19, 2025 07:03
Show Gist options
  • Select an option

  • Save ldotlopez/4c1d9f0144307161e3af758d049b5509 to your computer and use it in GitHub Desktop.

Select an option

Save ldotlopez/4c1d9f0144307161e3af758d049b5509 to your computer and use it in GitHub Desktop.
Sitges scrapper
import csv
import bs4
def parse_node(m: bs4.Tag, *, row_n: int) -> dict:
day = m.select_one(".day").text
start = m.select_one(".time").text
duration_min = m.select_one(".duration-text").text
titles = [x.text for x in m.select("li.movie-title")]
title = ", ".join(titles)
sections = ", ".join([x.text for x in m.select("ul.movie-sections li")])
location = m.select_one(".location").text
link = (
"https://sitgesfilmfestival.com" + m.select_one(".movie-title a").attrs["href"]
)
has_talent = "has-talent" in m.attrs.get("class", [])
is_maraton = "one-movie-session" not in m.attrs.get("class", [])
return {
"Dia semana": f'=Text(B{row_n};"dddd")',
"Dia": f"{day}/10/2025",
"Hora inicio": start,
"Hora fin": f"=C{row_n}+E{row_n}/60/24",
"Duración (minutos)": duration_min,
"Duración": f"=E{row_n}/24/60",
"Título (+link)": f"=HYPERLINK(M{row_n};L{row_n})",
"Maratón": "si" if is_maraton else "",
"Talento": "si" if has_talent else "",
"Cine": location,
"Secciones": sections,
"Título": title,
"URL": link,
}
def main():
import argparse
import sys
parser = argparse.ArgumentParser()
parser.add_argument("html")
args = parser.parse_args()
with open(args.html, "r", encoding="utf-8") as fh:
buff = fh.read()
soup = bs4.BeautifulSoup(buff, features="html.parser")
movies = soup.select("ul li.c-event-item")
rows = (parse_node(node, row_n=idx + 2) for (idx, node) in enumerate(movies))
out = csv.DictWriter(
sys.stdout,
fieldnames=[
"Dia semana",
"Dia",
"Hora inicio",
"Hora fin",
"Duración (minutos)",
"Duración",
"Maratón",
"Título (+link)",
"Talento",
"Cine",
"Secciones",
"Título",
"URL",
],
)
out.writeheader()
out.writerows(rows)
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment