Vesihiisi/Articles in svwp with most views in 2025.py

## Articles in svwp with most views in 2025.py
import requests
from collections import Counter, defaultdict
from datetime import date
import csv
import urllib.parse

YEAR = 2025
OUTPUT_CSV = f"svwiki_topviews_{YEAR}_ytd_top1000.csv"

HEADERS = {
    "accept": "application/json",
    "User-Agent": "wmse-viewcount-script"
}

BASE_ARTICLE_URL = "https://sv.wikipedia.org/wiki/"

today = date.today()
totals = Counter()

monthly_views = defaultdict(lambda: [0] * 13)  # index 1–12 used

def should_exclude(title: str) -> bool:
    if title.startswith("Portal:"):
        return True
    if title == "wiki.phtml":
        return True
    if title.startswith("Fil:"):
        return True
    if title.startswith("Special:"):
        return True
    return False

for month in range(1, 13):
    if YEAR == today.year and month > today.month:
        print(f"Skipping {YEAR}-{month:02d}: future month")
        continue

    url = (
        f"https://wikimedia.org/api/rest_v1/metrics/pageviews/top/"
        f"sv.wikipedia.org/all-access/{YEAR}/{month:02d}/all-days"
    )

    print(f"Fetching {url} ...")

    try:
        r = requests.get(url, headers=HEADERS, timeout=30)
        r.raise_for_status()
    except requests.exceptions.HTTPError as e:
        if r.status_code == 404:
            print(f"Skipping {YEAR}-{month:02d}: data not yet available (404)")
            continue
        if r.status_code == 403:
            print(f"Skipping {YEAR}-{month:02d}: forbidden (403)")
            continue

        print(f"Skipping {YEAR}-{month:02d}: HTTP error {e}")
        continue
    except requests.exceptions.RequestException as e:
        print(f"Skipping {YEAR}-{month:02d}: network error {e}")
        continue

    data = r.json()

    if not data.get("items"):
        print(f"Skipping {YEAR}-{month:02d}: no items in response")
        continue

    articles = data["items"][0].get("articles", [])
    for article in articles:
        title = article["article"]
        views = article["views"]
        if should_exclude(title):
            continue
        totals[title] += views
        monthly_views[title][month] += views

sorted_articles = totals.most_common()
sorted_articles = sorted_articles[:1000]

with open(OUTPUT_CSV, "w", newline="", encoding="utf-8") as f:
    writer = csv.writer(f)
    writer.writerow(["rank", "title", "views", "peak_month", "url"])

    for rank, (title, views) in enumerate(sorted_articles, start=1):
        # Determine the month with the most views
        # monthly_views[title] is a list of 13 ints, index 1–12
        month_list = monthly_views[title]
        # argmax over months 1..12
        best_month = max(range(1, 13), key=lambda m: month_list[m])
        peak_month_str = f"{YEAR}-{best_month:02d}"

        # Build URL
        url_title = urllib.parse.quote(title.replace(" ", "_"), safe="")
        url = f"{BASE_ARTICLE_URL}{url_title}"

        writer.writerow([rank, title, views, peak_month_str, url])

print(f"\nWrote top {len(sorted_articles)} rows to {OUTPUT_CSV}\n")

print("Top 20 articles this year so far (after exclusions):\n")
for rank, (title, views) in enumerate(sorted_articles[:20], start=1):
    # Same peak-month logic for console output
    month_list = monthly_views[title]
    best_month = max(range(1, 13), key=lambda m: month_list[m])
    peak_month_str = f"{YEAR}-{best_month:02d}"

    print(f"{rank}\t{title}\t{views}\t{peak_month_str}")
	import requests
	from collections import Counter, defaultdict
	from datetime import date
	import csv
	import urllib.parse

	YEAR = 2025
	OUTPUT_CSV = f"svwiki_topviews_{YEAR}_ytd_top1000.csv"

	HEADERS = {
	"accept": "application/json",
	"User-Agent": "wmse-viewcount-script"
	}

	BASE_ARTICLE_URL = "https://sv.wikipedia.org/wiki/"

	today = date.today()
	totals = Counter()

	monthly_views = defaultdict(lambda: [0] * 13) # index 1–12 used

	def should_exclude(title: str) -> bool:
	if title.startswith("Portal:"):
	return True
	if title == "wiki.phtml":
	return True
	if title.startswith("Fil:"):
	return True
	if title.startswith("Special:"):
	return True
	return False

	for month in range(1, 13):
	if YEAR == today.year and month > today.month:
	print(f"Skipping {YEAR}-{month:02d}: future month")
	continue

	url = (
	f"https://wikimedia.org/api/rest_v1/metrics/pageviews/top/"
	f"sv.wikipedia.org/all-access/{YEAR}/{month:02d}/all-days"
	)

	print(f"Fetching {url} ...")

	try:
	r = requests.get(url, headers=HEADERS, timeout=30)
	r.raise_for_status()
	except requests.exceptions.HTTPError as e:
	if r.status_code == 404:
	print(f"Skipping {YEAR}-{month:02d}: data not yet available (404)")
	continue
	if r.status_code == 403:
	print(f"Skipping {YEAR}-{month:02d}: forbidden (403)")
	continue

	print(f"Skipping {YEAR}-{month:02d}: HTTP error {e}")
	continue
	except requests.exceptions.RequestException as e:
	print(f"Skipping {YEAR}-{month:02d}: network error {e}")
	continue

	data = r.json()

	if not data.get("items"):
	print(f"Skipping {YEAR}-{month:02d}: no items in response")
	continue

	articles = data["items"][0].get("articles", [])
	for article in articles:
	title = article["article"]
	views = article["views"]
	if should_exclude(title):
	continue
	totals[title] += views
	monthly_views[title][month] += views

	sorted_articles = totals.most_common()
	sorted_articles = sorted_articles[:1000]

	with open(OUTPUT_CSV, "w", newline="", encoding="utf-8") as f:
	writer = csv.writer(f)
	writer.writerow(["rank", "title", "views", "peak_month", "url"])

	for rank, (title, views) in enumerate(sorted_articles, start=1):
	# Determine the month with the most views
	# monthly_views[title] is a list of 13 ints, index 1–12
	month_list = monthly_views[title]
	# argmax over months 1..12
	best_month = max(range(1, 13), key=lambda m: month_list[m])
	peak_month_str = f"{YEAR}-{best_month:02d}"

	# Build URL
	url_title = urllib.parse.quote(title.replace(" ", "_"), safe="")
	url = f"{BASE_ARTICLE_URL}{url_title}"

	writer.writerow([rank, title, views, peak_month_str, url])

	print(f"\nWrote top {len(sorted_articles)} rows to {OUTPUT_CSV}\n")

	print("Top 20 articles this year so far (after exclusions):\n")
	for rank, (title, views) in enumerate(sorted_articles[:20], start=1):
	# Same peak-month logic for console output
	month_list = monthly_views[title]
	best_month = max(range(1, 13), key=lambda m: month_list[m])
	peak_month_str = f"{YEAR}-{best_month:02d}"

	print(f"{rank}\t{title}\t{views}\t{peak_month_str}")
No results found