legacy-arrflix/processes/subtitles/lib/sub-a7d-fetch.py
s8n c6ec208520 processes/subtitles: COVERAGE.md live audit + auto-refresh on fetch
Adds lib/audit-coverage.py: queries Jellyfin live for every series, every
episode, and every movie; classifies each by whether the English subtitle
comes from a sidecar, embedded stream, or doesn't exist; renders a
Markdown report with one-char-per-episode bars for visual scanning. Output
file is processes/subtitles/COVERAGE.md, regenerated on demand.

v2 sub-rest-fetch.py and v3 sub-a7d-fetch.py now invoke the audit at end
of a successful run, so the committed coverage file stays in sync with
library state without manual intervention. v3.5 yt-fetch path skips the
auto-call since it doesn't speak to Jellyfin directly; run audit manually
after copying YT sidecars to nullstone.

README.md surfaces the audit at the top so anyone landing in the recipe
folder sees current state before starting a run.
2026-05-10 02:19:32 +01:00

260 lines
8.8 KiB
Python
Executable file

#!/usr/bin/env python3
"""Subtitle fetcher v3 — Addic7ed via subliminal.
Free, no daily quota. Uses OpenSubtitles REST (search-only, no downloads,
no quota burn) to translate library S/E numbering to the show's primary
catalogue numbering (e.g. Hulu→Fox for American Dad), then drives
subliminal's addic7ed provider for the actual download.
Why v3: OS REST `/download` is capped at 20/day on free tier. Addic7ed
serves anonymous downloads with no daily limit. v2 (lib/sub-rest-fetch.py)
remains the right tool when quota isn't the bottleneck — addic7ed has
narrower coverage than OpenSubtitles (English only, mostly).
Picker: subliminal's own scoring against the matched Video (filename, S/E,
year). For AD, addic7ed catalogues by Fox airing order, so the script
remaps library Hulu numbering via per-ep IMDB id lookup on OS REST.
Usage:
sub-a7d-fetch.py <series-id> --season N [--start E] [--end E]
sub-a7d-fetch.py <series-id> --all
Env (required):
JELLYFIN_TOKEN X-Emby-Token for nullstone Jellyfin
OPENSUBTITLES_API_KEY Path to file holding the OS REST key (search only)
Env (optional):
NULLSTONE SSH target, default user@192.168.0.100
DRY_RUN=1 search + remap only, no download
"""
from __future__ import annotations
import argparse
import json
import os
import re
import shlex
import subprocess
import sys
import tempfile
import urllib.parse
from babelfish import Language
from subliminal import (Video, region, list_subtitles, download_subtitles,
save_subtitles)
OS_BASE = "https://api.opensubtitles.com/api/v1"
USER_AGENT = "arrflix v1.0.0"
JF_BASE = "http://localhost:8096"
NULLSTONE = os.environ.get("NULLSTONE", "user@192.168.0.100")
region.configure("dogpile.cache.memory")
def die(msg: str, code: int = 1) -> None:
print(f"ERROR: {msg}", file=sys.stderr)
sys.exit(code)
def env_or_die(name: str) -> str:
v = os.environ.get(name)
if not v:
die(f"{name} not set")
return v
def load_api_key() -> str:
path = env_or_die("OPENSUBTITLES_API_KEY")
with open(path) as f:
return f.read().strip()
def jellyfin(path: str, params: dict | None = None) -> dict:
tok = env_or_die("JELLYFIN_TOKEN")
qs = "?" + urllib.parse.urlencode(params, safe=",") if params else ""
url = JF_BASE + path + qs
cmd = ["ssh", NULLSTONE,
f"docker exec jellyfin curl -s -H 'X-Emby-Token: {tok}' {shlex.quote(url)}"]
return json.loads(subprocess.check_output(cmd, text=True))
def list_episodes(series_id: str) -> list[dict]:
d = jellyfin("/Items", {
"ParentId": series_id,
"IncludeItemTypes": "Episode",
"Recursive": "true",
"Fields": "Path,ParentIndexNumber,IndexNumber,ProviderIds",
"SortBy": "ParentIndexNumber,IndexNumber",
})
return d["Items"]
def imdb_strip(s: str | None) -> str | None:
if not s:
return None
return s[2:] if s.startswith("tt") else s
def os_search_imdb(api_key: str, imdb_no_tt: str) -> tuple[int, int] | None:
"""Look up the show's primary catalogue (season, episode) by per-ep IMDB id.
Uses OS feature_details S/E (which appears to align with what Addic7ed
indexes for at least the test shows). Search calls do not consume the
daily quota. If the resulting download mismatches expected dialogue,
consider re-running with the v2 OS REST path which uses imdb_id directly."""
cmd = ["curl", "-sSf",
"-H", f"Api-Key: {api_key}",
"-H", f"User-Agent: {USER_AGENT}",
f"{OS_BASE}/subtitles?imdb_id={imdb_no_tt}&languages=en&per_page=5"]
raw = subprocess.check_output(cmd)
j = json.loads(raw.decode())
for h in j.get("data", []):
fd = h.get("attributes", {}).get("feature_details", {})
s, e = fd.get("season_number"), fd.get("episode_number")
if s and e:
return int(s), int(e)
return None
def episode_to_paths(ep: dict) -> tuple[str, str]:
"""Return (remote_dir, base_filename) for sidecar placement on nullstone."""
container_path = ep["Path"]
host_path = container_path.replace("/media/", "/home/user/media/")
return os.path.dirname(host_path), os.path.splitext(os.path.basename(host_path))[0]
def addic7ed_safe_name(series: str, year: int | None, fox_s: int, fox_e: int) -> str:
"""Build filename that subliminal+addic7ed match. Strip '!' (breaks matcher)
and other punctuation; keep year if known."""
cleaned = re.sub(r"[!?:]", "", series).replace(" ", ".")
yearbit = f".{year}" if year else ""
return f"{cleaned}{yearbit}.S{fox_s:02d}E{fox_e:02d}.HDTV.x264.mkv"
def write_sidecar_remote(content: bytes, remote_path: str) -> None:
p = subprocess.Popen(["ssh", NULLSTONE, f"cat > {shlex.quote(remote_path)}"],
stdin=subprocess.PIPE)
p.communicate(content)
if p.returncode != 0:
die(f"failed writing {remote_path}")
def main() -> int:
ap = argparse.ArgumentParser()
ap.add_argument("series_id")
ap.add_argument("--season", type=int, default=None)
ap.add_argument("--start", type=int, default=1)
ap.add_argument("--end", type=int, default=10**6)
ap.add_argument("--all", action="store_true")
args = ap.parse_args()
if args.season is None and not args.all:
die("pass --season N or --all")
api_key = load_api_key()
dry = os.environ.get("DRY_RUN") == "1"
eps = list_episodes(args.series_id)
work = []
for ep in eps:
s, n = ep["ParentIndexNumber"], ep["IndexNumber"]
if not args.all and s != args.season:
continue
if not (args.start <= n <= args.end):
continue
work.append(ep)
if not work:
die("no episodes selected")
print(f"[plan] {len(work)} episodes selected", file=sys.stderr)
ok = 0
fail = []
for ep in work:
s, n = ep["ParentIndexNumber"], ep["IndexNumber"]
label = f"libS{s:02}E{n:02} {ep['Name']}"
imdb = imdb_strip(ep.get("ProviderIds", {}).get("Imdb"))
if not imdb:
print(f"[skip] {label} — no IMDB id", file=sys.stderr)
fail.append((label, "no-imdb"))
continue
try:
fox = os_search_imdb(api_key, imdb)
except subprocess.CalledProcessError as e:
print(f"[skip] {label} — OS search err {e.returncode}", file=sys.stderr)
fail.append((label, "os-search"))
continue
if fox is None:
print(f"[skip] {label} — OS has no S/E for imdb={imdb}", file=sys.stderr)
fail.append((label, "no-fox-se"))
continue
fox_s, fox_e = fox
# series name + year — pull from path or item
series_name = ep.get("SeriesName") or "Show"
year = None
ymatch = re.search(r"\((\d{4})\)", ep.get("Path", ""))
if ymatch:
year = int(ymatch.group(1))
v_name = addic7ed_safe_name(series_name, year, fox_s, fox_e)
v = Video.fromname(v_name)
try:
hits = list_subtitles([v], {Language("eng")},
providers=["addic7ed"]).get(v, [])
except Exception as e:
print(f"[skip] {label} — addic7ed list err: {type(e).__name__}",
file=sys.stderr)
fail.append((label, "a7d-list"))
continue
if not hits:
print(f"[skip] {label} — addic7ed 0 subs (foxS{fox_s:02}E{fox_e:02})",
file=sys.stderr)
fail.append((label, "a7d-no-hits"))
continue
pick = hits[0] # subliminal returns ordered; take first
print(f"[pick] {label} -> foxS{fox_s:02}E{fox_e:02} a7d={pick.id}",
file=sys.stderr)
if dry:
ok += 1
continue
try:
download_subtitles([pick])
except Exception as e:
print(f"[fail] {label} — addic7ed dl err: {type(e).__name__}: {e}",
file=sys.stderr)
fail.append((label, "a7d-dl"))
continue
if not pick.content:
print(f"[fail] {label} — empty content", file=sys.stderr)
fail.append((label, "empty"))
continue
remote_dir, base = episode_to_paths(ep)
dest = f"{remote_dir}/{base}.eng.srt"
write_sidecar_remote(pick.content, dest)
print(f"[ok] {label} -> {dest}", file=sys.stderr)
ok += 1
print(f"\n[done] ok={ok}/{len(work)} failures={len(fail)}", file=sys.stderr)
for lab, why in fail:
print(f" - {lab}: {why}", file=sys.stderr)
if ok:
try:
subprocess.run([os.path.join(os.path.dirname(__file__),
"audit-coverage.py")],
check=False)
except Exception as e:
print(f"[warn] coverage refresh skipped: {e}", file=sys.stderr)
return 0 if ok else 2
if __name__ == "__main__":
sys.exit(main())