#!/usr/bin/env python3 """Subtitle fetcher v3 — Addic7ed via subliminal. Free, no daily quota. Uses OpenSubtitles REST (search-only, no downloads, no quota burn) to translate library S/E numbering to the show's primary catalogue numbering (e.g. Hulu→Fox for American Dad), then drives subliminal's addic7ed provider for the actual download. Why v3: OS REST `/download` is capped at 20/day on free tier. Addic7ed serves anonymous downloads with no daily limit. v2 (lib/sub-rest-fetch.py) remains the right tool when quota isn't the bottleneck — addic7ed has narrower coverage than OpenSubtitles (English only, mostly). Picker: subliminal's own scoring against the matched Video (filename, S/E, year). For AD, addic7ed catalogues by Fox airing order, so the script remaps library Hulu numbering via per-ep IMDB id lookup on OS REST. Usage: sub-a7d-fetch.py --season N [--start E] [--end E] sub-a7d-fetch.py --all Env (required): JELLYFIN_TOKEN X-Emby-Token for nullstone Jellyfin OPENSUBTITLES_API_KEY Path to file holding the OS REST key (search only) Env (optional): NULLSTONE SSH target, default user@192.168.0.100 DRY_RUN=1 search + remap only, no download """ from __future__ import annotations import argparse import json import os import re import shlex import subprocess import sys import tempfile import urllib.parse from babelfish import Language from subliminal import (Video, region, list_subtitles, download_subtitles, save_subtitles) OS_BASE = "https://api.opensubtitles.com/api/v1" USER_AGENT = "arrflix v1.0.0" JF_BASE = "http://localhost:8096" NULLSTONE = os.environ.get("NULLSTONE", "user@192.168.0.100") region.configure("dogpile.cache.memory") def die(msg: str, code: int = 1) -> None: print(f"ERROR: {msg}", file=sys.stderr) sys.exit(code) def env_or_die(name: str) -> str: v = os.environ.get(name) if not v: die(f"{name} not set") return v def load_api_key() -> str: path = env_or_die("OPENSUBTITLES_API_KEY") with open(path) as f: return f.read().strip() def jellyfin(path: str, params: dict | None = None) -> dict: tok = env_or_die("JELLYFIN_TOKEN") qs = "?" + urllib.parse.urlencode(params, safe=",") if params else "" url = JF_BASE + path + qs cmd = ["ssh", NULLSTONE, f"docker exec jellyfin curl -s -H 'X-Emby-Token: {tok}' {shlex.quote(url)}"] return json.loads(subprocess.check_output(cmd, text=True)) def list_episodes(series_id: str) -> list[dict]: d = jellyfin("/Items", { "ParentId": series_id, "IncludeItemTypes": "Episode", "Recursive": "true", "Fields": "Path,ParentIndexNumber,IndexNumber,ProviderIds", "SortBy": "ParentIndexNumber,IndexNumber", }) return d["Items"] def imdb_strip(s: str | None) -> str | None: if not s: return None return s[2:] if s.startswith("tt") else s def os_search_imdb(api_key: str, imdb_no_tt: str) -> tuple[int, int] | None: """Look up the show's primary catalogue (season, episode) by per-ep IMDB id. Uses OS feature_details S/E (which appears to align with what Addic7ed indexes for at least the test shows). Search calls do not consume the daily quota. If the resulting download mismatches expected dialogue, consider re-running with the v2 OS REST path which uses imdb_id directly.""" cmd = ["curl", "-sSf", "-H", f"Api-Key: {api_key}", "-H", f"User-Agent: {USER_AGENT}", f"{OS_BASE}/subtitles?imdb_id={imdb_no_tt}&languages=en&per_page=5"] raw = subprocess.check_output(cmd) j = json.loads(raw.decode()) for h in j.get("data", []): fd = h.get("attributes", {}).get("feature_details", {}) s, e = fd.get("season_number"), fd.get("episode_number") if s and e: return int(s), int(e) return None def episode_to_paths(ep: dict) -> tuple[str, str]: """Return (remote_dir, base_filename) for sidecar placement on nullstone.""" container_path = ep["Path"] host_path = container_path.replace("/media/", "/home/user/media/") return os.path.dirname(host_path), os.path.splitext(os.path.basename(host_path))[0] def addic7ed_safe_name(series: str, year: int | None, fox_s: int, fox_e: int) -> str: """Build filename that subliminal+addic7ed match. Strip '!' (breaks matcher) and other punctuation; keep year if known.""" cleaned = re.sub(r"[!?:]", "", series).replace(" ", ".") yearbit = f".{year}" if year else "" return f"{cleaned}{yearbit}.S{fox_s:02d}E{fox_e:02d}.HDTV.x264.mkv" def write_sidecar_remote(content: bytes, remote_path: str) -> None: p = subprocess.Popen(["ssh", NULLSTONE, f"cat > {shlex.quote(remote_path)}"], stdin=subprocess.PIPE) p.communicate(content) if p.returncode != 0: die(f"failed writing {remote_path}") def main() -> int: ap = argparse.ArgumentParser() ap.add_argument("series_id") ap.add_argument("--season", type=int, default=None) ap.add_argument("--start", type=int, default=1) ap.add_argument("--end", type=int, default=10**6) ap.add_argument("--all", action="store_true") args = ap.parse_args() if args.season is None and not args.all: die("pass --season N or --all") api_key = load_api_key() dry = os.environ.get("DRY_RUN") == "1" eps = list_episodes(args.series_id) work = [] for ep in eps: s, n = ep["ParentIndexNumber"], ep["IndexNumber"] if not args.all and s != args.season: continue if not (args.start <= n <= args.end): continue work.append(ep) if not work: die("no episodes selected") print(f"[plan] {len(work)} episodes selected", file=sys.stderr) ok = 0 fail = [] for ep in work: s, n = ep["ParentIndexNumber"], ep["IndexNumber"] label = f"libS{s:02}E{n:02} {ep['Name']}" imdb = imdb_strip(ep.get("ProviderIds", {}).get("Imdb")) if not imdb: print(f"[skip] {label} — no IMDB id", file=sys.stderr) fail.append((label, "no-imdb")) continue try: fox = os_search_imdb(api_key, imdb) except subprocess.CalledProcessError as e: print(f"[skip] {label} — OS search err {e.returncode}", file=sys.stderr) fail.append((label, "os-search")) continue if fox is None: print(f"[skip] {label} — OS has no S/E for imdb={imdb}", file=sys.stderr) fail.append((label, "no-fox-se")) continue fox_s, fox_e = fox # series name + year — pull from path or item series_name = ep.get("SeriesName") or "Show" year = None ymatch = re.search(r"\((\d{4})\)", ep.get("Path", "")) if ymatch: year = int(ymatch.group(1)) v_name = addic7ed_safe_name(series_name, year, fox_s, fox_e) v = Video.fromname(v_name) try: hits = list_subtitles([v], {Language("eng")}, providers=["addic7ed"]).get(v, []) except Exception as e: print(f"[skip] {label} — addic7ed list err: {type(e).__name__}", file=sys.stderr) fail.append((label, "a7d-list")) continue if not hits: print(f"[skip] {label} — addic7ed 0 subs (foxS{fox_s:02}E{fox_e:02})", file=sys.stderr) fail.append((label, "a7d-no-hits")) continue pick = hits[0] # subliminal returns ordered; take first print(f"[pick] {label} -> foxS{fox_s:02}E{fox_e:02} a7d={pick.id}", file=sys.stderr) if dry: ok += 1 continue try: download_subtitles([pick]) except Exception as e: print(f"[fail] {label} — addic7ed dl err: {type(e).__name__}: {e}", file=sys.stderr) fail.append((label, "a7d-dl")) continue if not pick.content: print(f"[fail] {label} — empty content", file=sys.stderr) fail.append((label, "empty")) continue remote_dir, base = episode_to_paths(ep) dest = f"{remote_dir}/{base}.eng.srt" write_sidecar_remote(pick.content, dest) print(f"[ok] {label} -> {dest}", file=sys.stderr) ok += 1 print(f"\n[done] ok={ok}/{len(work)} failures={len(fail)}", file=sys.stderr) for lab, why in fail: print(f" - {lab}: {why}", file=sys.stderr) if ok: try: subprocess.run([os.path.join(os.path.dirname(__file__), "audit-coverage.py")], check=False) except Exception as e: print(f"[warn] coverage refresh skipped: {e}", file=sys.stderr) return 0 if ok else 2 if __name__ == "__main__": sys.exit(main())