"""betaflix-catalog — Sonarr/Radarr OnImport webhook receiver. Listens for OnImport events from Sonarr and Radarr, edits `playbooks/import-media/MEDIA-LIST.md` in the beta-flix Forgejo repo, writes a per-import run log, and commits + pushes as `obsidian-ai`. POST endpoints: /sonarr Sonarr Connect webhook target. /radarr Radarr Connect webhook target. /healthz Liveness probe. Idempotency: payload-hash cache at /state/seen-imports.json. Duplicates skipped. Environment: FORGEJO_REMOTE e.g. https://git.s8n.ru/s8n/beta-flix.git FORGEJO_PUSH_TOKEN PAT — embedded into the push URL. GIT_AUTHOR_NAME obsidian-ai GIT_AUTHOR_EMAIL obsidian-ai@s8n.ru LISTEN_PORT 5055 """ from __future__ import annotations import hashlib import json import logging import os import re import subprocess import sys import threading from datetime import datetime, timezone from pathlib import Path from typing import Any from flask import Flask, jsonify, request from jinja2 import Environment, FileSystemLoader, select_autoescape # --- Config ----------------------------------------------------------------- REPO_PATH = Path(os.environ.get("REPO_PATH", "/repo")) STATE_DIR = Path(os.environ.get("STATE_DIR", "/state")) TEMPLATES_DIR = Path(__file__).parent / "templates" FORGEJO_REMOTE = os.environ.get("FORGEJO_REMOTE", "https://git.s8n.ru/s8n/beta-flix.git") FORGEJO_TOKEN = os.environ.get("FORGEJO_PUSH_TOKEN", "") GIT_AUTHOR_NAME = os.environ.get("GIT_AUTHOR_NAME", "obsidian-ai") GIT_AUTHOR_EMAIL = os.environ.get("GIT_AUTHOR_EMAIL", "obsidian-ai@s8n.ru") LISTEN_PORT = int(os.environ.get("LISTEN_PORT", "5055")) MEDIA_LIST = REPO_PATH / "playbooks" / "import-media" / "MEDIA-LIST.md" RUNS_DIR = REPO_PATH / "playbooks" / "import-media" / "runs" SEEN_PATH = STATE_DIR / "seen-imports.json" # Section headers in MEDIA-LIST.md the bot will edit. MOVIES_SECTION = "## Movies" TV_SECTION = "## TV" logging.basicConfig( level=logging.INFO, format="%(asctime)s [%(levelname)s] %(message)s", stream=sys.stdout, ) log = logging.getLogger("catalog") app = Flask(__name__) _lock = threading.Lock() _jinja = Environment( loader=FileSystemLoader(str(TEMPLATES_DIR)), autoescape=select_autoescape(["html", "xml"]), trim_blocks=True, lstrip_blocks=True, ) # --- Idempotency ------------------------------------------------------------ def _load_seen() -> set[str]: if not SEEN_PATH.exists(): return set() try: return set(json.loads(SEEN_PATH.read_text())) except Exception: log.warning("seen-imports.json corrupt; resetting") return set() def _save_seen(seen: set[str]) -> None: STATE_DIR.mkdir(parents=True, exist_ok=True) SEEN_PATH.write_text(json.dumps(sorted(seen))) def _payload_hash(kind: str, payload: dict[str, Any]) -> str: """Stable hash for the import event — series:season:episode or movie:year.""" if kind == "sonarr": sid = payload.get("series", {}).get("id", "?") eps = payload.get("episodes", []) or [{}] keys = sorted(f"{e.get('seasonNumber', '?')}x{e.get('episodeNumber', '?')}" for e in eps) seed = f"sonarr:{sid}:{','.join(keys)}" elif kind == "radarr": mid = payload.get("movie", {}).get("id", "?") seed = f"radarr:{mid}" else: seed = f"unknown:{json.dumps(payload, sort_keys=True)}" return hashlib.sha256(seed.encode()).hexdigest()[:16] # --- Git helpers ------------------------------------------------------------ def _git(*args: str, cwd: Path = REPO_PATH) -> subprocess.CompletedProcess: env = os.environ.copy() env.setdefault("GIT_AUTHOR_NAME", GIT_AUTHOR_NAME) env.setdefault("GIT_AUTHOR_EMAIL", GIT_AUTHOR_EMAIL) env.setdefault("GIT_COMMITTER_NAME", GIT_AUTHOR_NAME) env.setdefault("GIT_COMMITTER_EMAIL", GIT_AUTHOR_EMAIL) return subprocess.run( ["git", *args], cwd=cwd, env=env, check=True, capture_output=True, text=True, ) def _ensure_repo() -> None: """Clone the repo if /repo is empty.""" if (REPO_PATH / ".git").is_dir(): return REPO_PATH.mkdir(parents=True, exist_ok=True) clone_url = _push_url() subprocess.run( ["git", "clone", clone_url, str(REPO_PATH)], check=True, capture_output=True, text=True, ) def _push_url() -> str: if FORGEJO_TOKEN and FORGEJO_REMOTE.startswith("https://"): return FORGEJO_REMOTE.replace("https://", f"https://{FORGEJO_TOKEN}@", 1) return FORGEJO_REMOTE def _pull_rebase() -> None: _git("fetch", "origin") _git("rebase", "origin/main") def _commit_and_push(title: str) -> str: _git("add", "playbooks/import-media/MEDIA-LIST.md", "playbooks/import-media/runs/") status = _git("status", "--porcelain") if not status.stdout.strip(): log.info("no changes to commit (%s)", title) return "" msg = f"catalog: add {title}" _git("commit", "-m", msg, f"--author={GIT_AUTHOR_NAME} <{GIT_AUTHOR_EMAIL}>") _git("push", _push_url(), "HEAD:main") sha = _git("rev-parse", "HEAD").stdout.strip() return sha # --- MEDIA-LIST.md editing -------------------------------------------------- def _normalise_title(title: str) -> str: return re.sub(r"\s+", " ", title.strip()) def _slugify(s: str) -> str: s = re.sub(r"[^a-zA-Z0-9]+", "-", s.lower()).strip("-") return s[:80] or "untitled" def _row(kind: str, title: str, year: int | None, source: str) -> str: year_s = f"({year})" if year else "" if kind == "tv": return f"| {title} {year_s} | TV | {source} | _todo_ | " return f"| {title} {year_s} | Movie | {source} | _todo_ | " def _insert_alphabetic(section_header: str, row: str, key: str) -> bool: """Insert `row` into the section under `section_header`, alphabetic by key. Returns True if a new row was added, False if the key already existed (caller handles merge/dedup separately). """ if not MEDIA_LIST.exists(): log.warning("MEDIA-LIST.md missing at %s — skipping insert", MEDIA_LIST) return False lines = MEDIA_LIST.read_text().splitlines() try: start = next(i for i, line in enumerate(lines) if line.strip() == section_header) except StopIteration: log.warning("section %r not found in MEDIA-LIST.md", section_header) return False # Find table boundaries. i = start + 1 while i < len(lines) and not lines[i].lstrip().startswith("|"): i += 1 if i >= len(lines): log.warning("no table found under section %r", section_header) return False header_idx = i # Skip header + separator rows. i = header_idx + 2 section_rows_start = i while i < len(lines) and lines[i].lstrip().startswith("|"): if key.lower() in lines[i].lower(): log.info("row already present for key=%r — skipping", key) return False i += 1 section_rows_end = i # Alphabetic insert by first column. insert_at = section_rows_end for j in range(section_rows_start, section_rows_end): cell = lines[j].split("|")[1].strip() if "|" in lines[j] else "" if key.lower() < cell.lower(): insert_at = j break lines.insert(insert_at, row) MEDIA_LIST.write_text("\n".join(lines) + "\n") return True def _write_run_log(slug: str, ctx: dict[str, Any]) -> Path: RUNS_DIR.mkdir(parents=True, exist_ok=True) template = _jinja.get_template("run.md.j2") out = RUNS_DIR / f"{slug}.md" out.write_text(template.render(**ctx)) return out # --- Webhook handlers ------------------------------------------------------- def _handle_sonarr(payload: dict[str, Any]) -> tuple[str, bool]: series = payload.get("series", {}) or {} title = _normalise_title(series.get("title", "Unknown Series")) year = series.get("year") or None eps = payload.get("episodes", []) or [] files = payload.get("episodeFiles") or payload.get("episodeFile") or [] if isinstance(files, dict): files = [files] source = (files[0].get("sceneName") or files[0].get("relativePath") or "?") if files else "?" key = f"{title} ({year})" if year else title with _lock: _ensure_repo() _pull_rebase() added = _insert_alphabetic(TV_SECTION, _row("tv", title, year, source), key) slug = _slugify(f"{key}-S{eps[0].get('seasonNumber','?')}E{eps[0].get('episodeNumber','?')}" if eps else key) _write_run_log(slug, { "kind": "tv", "title": title, "year": year, "source": source, "episodes": eps, "ts": datetime.now(timezone.utc).isoformat(timespec="seconds"), "row_added": added, }) sha = _commit_and_push(f"{title} ({year})" if year else title) return sha, added def _handle_radarr(payload: dict[str, Any]) -> tuple[str, bool]: movie = payload.get("movie", {}) or {} title = _normalise_title(movie.get("title", "Unknown Movie")) year = movie.get("year") or None mfile = payload.get("movieFile") or {} source = mfile.get("sceneName") or mfile.get("relativePath") or "?" key = f"{title} ({year})" if year else title with _lock: _ensure_repo() _pull_rebase() added = _insert_alphabetic(MOVIES_SECTION, _row("movie", title, year, source), key) slug = _slugify(key) _write_run_log(slug, { "kind": "movie", "title": title, "year": year, "source": source, "ts": datetime.now(timezone.utc).isoformat(timespec="seconds"), "row_added": added, }) sha = _commit_and_push(f"{title} ({year})" if year else title) return sha, added # --- Flask routes ----------------------------------------------------------- @app.get("/healthz") def healthz(): return jsonify(ok=True), 200 @app.post("/sonarr") def sonarr(): payload = request.get_json(silent=True) or {} event = payload.get("eventType", "") if event in ("Test", "ApplicationUpdate"): log.info("sonarr probe event=%s — ack", event) return jsonify(ok=True, ignored=event), 200 if event != "Import" and event != "Download": log.info("sonarr event=%s — ignored", event) return jsonify(ok=True, ignored=event), 200 h = _payload_hash("sonarr", payload) seen = _load_seen() if h in seen: log.info("sonarr duplicate event hash=%s — skipping", h) return jsonify(ok=True, duplicate=h), 200 try: sha, added = _handle_sonarr(payload) except subprocess.CalledProcessError as e: log.exception("git failed: %s", e.stderr) return jsonify(ok=False, error=e.stderr), 500 except Exception as e: # noqa: BLE001 log.exception("sonarr handler crashed") return jsonify(ok=False, error=str(e)), 500 seen.add(h) _save_seen(seen) return jsonify(ok=True, sha=sha, row_added=added), 200 @app.post("/radarr") def radarr(): payload = request.get_json(silent=True) or {} event = payload.get("eventType", "") if event in ("Test", "ApplicationUpdate"): log.info("radarr probe event=%s — ack", event) return jsonify(ok=True, ignored=event), 200 if event != "Import" and event != "Download": log.info("radarr event=%s — ignored", event) return jsonify(ok=True, ignored=event), 200 h = _payload_hash("radarr", payload) seen = _load_seen() if h in seen: log.info("radarr duplicate event hash=%s — skipping", h) return jsonify(ok=True, duplicate=h), 200 try: sha, added = _handle_radarr(payload) except subprocess.CalledProcessError as e: log.exception("git failed: %s", e.stderr) return jsonify(ok=False, error=e.stderr), 500 except Exception as e: # noqa: BLE001 log.exception("radarr handler crashed") return jsonify(ok=False, error=str(e)), 500 seen.add(h) _save_seen(seen) return jsonify(ok=True, sha=sha, row_added=added), 200 if __name__ == "__main__": log.info("betaflix-catalog listening on 0.0.0.0:%d", LISTEN_PORT) app.run(host="0.0.0.0", port=LISTEN_PORT)