|
#!/usr/bin/env python3 |
|
""" |
|
Recalculate a harmonic frequency field from a HTML list of frequency tags. |
|
|
|
Defaults are tailored for Hoshi Reader + Lapis: |
|
- Note type: Lapis |
|
- Source field: Frequency |
|
- Target field: FreqSort |
|
|
|
Computation (Yomitan-aligned): |
|
- For each dictionary, use only the first occurrence. |
|
- Parse the numeric prefix from the value (e.g. "12345㋕" -> 12345). |
|
- Harmonic mean with floor: floor(n / sum(1/f)). |
|
- If no valid values: 9999999. |
|
""" |
|
|
|
import argparse |
|
import json |
|
import math |
|
import re |
|
import sys |
|
import urllib.request |
|
|
|
LI_RE = re.compile(r"<li[^>]*>(.*?)</li>", re.I | re.S) |
|
TAG_RE = re.compile(r"<[^>]+>") |
|
NUM_RE = re.compile(r"^\s*(\d+)") |
|
|
|
|
|
def ac(url, action, params): |
|
req = urllib.request.Request( |
|
url, |
|
data=json.dumps({"action": action, "version": 6, "params": params}).encode("utf-8"), |
|
headers={"Content-Type": "application/json"}, |
|
) |
|
with urllib.request.urlopen(req) as resp: |
|
return json.loads(resp.read()) |
|
|
|
|
|
def parse_values(html): |
|
values = [] |
|
seen = set() |
|
for item in LI_RE.findall(html or ""): |
|
text = TAG_RE.sub("", item).strip() |
|
if ":" not in text: |
|
continue |
|
dict_name, val_text = text.split(":", 1) |
|
dict_name = dict_name.strip() |
|
if dict_name in seen: |
|
continue |
|
seen.add(dict_name) |
|
m = NUM_RE.match(val_text) |
|
if m: |
|
v = int(m.group(1)) |
|
if v > 0: |
|
values.append(v) |
|
return values |
|
|
|
|
|
def harmonic_floor(values): |
|
if not values: |
|
return "9999999" |
|
return str(math.floor(len(values) / sum(1 / v for v in values))) |
|
|
|
|
|
def main(): |
|
parser = argparse.ArgumentParser(description="Recalculate frequency harmonic rank for Anki notes.") |
|
parser.add_argument("--url", default="http://localhost:8765", help="AnkiConnect URL") |
|
parser.add_argument("--note-type", default="Lapis", help="Note type name") |
|
parser.add_argument("--freq-field", default="Frequency", help="Source field name") |
|
parser.add_argument("--target-field", default="FreqSort", help="Target field name") |
|
parser.add_argument("--only-empty", action="store_true", help="Only update notes where target field is empty") |
|
parser.add_argument("--dry-run", action="store_true", help="Do not write changes") |
|
parser.add_argument("--limit", type=int, default=0, help="Limit number of notes processed") |
|
|
|
args = parser.parse_args() |
|
|
|
query = f'note:"{args.note_type}"' |
|
res = ac(args.url, "findNotes", {"query": query}) |
|
if res.get("error"): |
|
print("findNotes error:", res, file=sys.stderr) |
|
return 1 |
|
|
|
note_ids = res.get("result") or [] |
|
if args.limit and len(note_ids) > args.limit: |
|
note_ids = note_ids[: args.limit] |
|
|
|
if not note_ids: |
|
print("No notes found.") |
|
return 0 |
|
|
|
res = ac(args.url, "notesInfo", {"notes": note_ids}) |
|
if res.get("error"): |
|
print("notesInfo error:", res, file=sys.stderr) |
|
return 1 |
|
|
|
infos = res.get("result") or [] |
|
updated = 0 |
|
mismatched = 0 |
|
errors = 0 |
|
|
|
for n in infos: |
|
fields = n.get("fields", {}) |
|
if args.freq_field not in fields or args.target_field not in fields: |
|
continue |
|
|
|
old_val = (fields[args.target_field]["value"] or "").strip() |
|
if args.only_empty and old_val: |
|
continue |
|
|
|
values = parse_values(fields[args.freq_field]["value"] or "") |
|
new_val = harmonic_floor(values) |
|
|
|
if old_val == new_val: |
|
continue |
|
|
|
mismatched += 1 |
|
if args.dry_run: |
|
continue |
|
|
|
res = ac( |
|
args.url, |
|
"updateNoteFields", |
|
{"note": {"id": n["noteId"], "fields": {args.target_field: new_val}}}, |
|
) |
|
if res.get("error"): |
|
errors += 1 |
|
if errors <= 5: |
|
print("updateNoteFields error:", res, file=sys.stderr) |
|
continue |
|
updated += 1 |
|
|
|
print(f"notes: {len(infos)}") |
|
print(f"mismatched: {mismatched}") |
|
print(f"updated: {updated}") |
|
print(f"errors: {errors}") |
|
if args.dry_run: |
|
print("dry-run: no changes written") |
|
|
|
return 0 |
|
|
|
|
|
if __name__ == "__main__": |
|
raise SystemExit(main()) |