chandradeoarya/blogger-scrapper-csv.py

## blogger-scrapper-csv.py
import csv

from bs4 import BeautifulSoup

def extract_first_image_url(html_string):
    soup = BeautifulSoup(html_string, 'html.parser')
    img_tag = soup.find('img')

    if img_tag:
        img_url = img_tag.get('src')
        return img_url
    else:
        return None

def remove_spaces_and_quote(input_string):
    cleaned_string = input_string.strip().replace("'", '')

csv_filename = "filename.csv"

with open(csv_filename, 'w', newline='', encoding='utf-8') as csvfile:
    csv_writer = csv.writer(csvfile)

    # Write header row
    csv_writer.writerow(['post_image', 'post_title', 'post_content', 'published_date'])
    for article in articles:
        img = extract_first_image_url(article.content)
        title = article.title
        content = article.content
        published_date = article.published_date.strftime('%Y%m%d%H%M%S')
        csv_writer.writerow([img, title, content, published_date])
	import csv

	from bs4 import BeautifulSoup

	def extract_first_image_url(html_string):
	soup = BeautifulSoup(html_string, 'html.parser')
	img_tag = soup.find('img')

	if img_tag:
	img_url = img_tag.get('src')
	return img_url
	else:
	return None

	def remove_spaces_and_quote(input_string):
	cleaned_string = input_string.strip().replace("'", '')

	csv_filename = "filename.csv"

	with open(csv_filename, 'w', newline='', encoding='utf-8') as csvfile:
	csv_writer = csv.writer(csvfile)

	# Write header row
	csv_writer.writerow(['post_image', 'post_title', 'post_content', 'published_date'])
	for article in articles:
	img = extract_first_image_url(article.content)
	title = article.title
	content = article.content
	published_date = article.published_date.strftime('%Y%m%d%H%M%S')
	csv_writer.writerow([img, title, content, published_date])
No results found