legacy-arrflix/playbooks/subtitles/lib/sub-a7d-fetch.py
s8n 24a9497e7d playbooks/ rename + import-media v1.0 + lilo&stitch run
processes/ -> playbooks/ (git mv preserves history; updated cross-refs
in ROADMAP, README, subtitles playbook + scripts).

playbooks/import-media/README.md v1.0 — 7-step import workflow:
  stage on onyx -> rsync to nullstone -> chmod -> verify scan ->
  Items/Counts bump -> optional subtitle pass -> run-log
Cross-references docs/05/07/08, ADMIN-GUIDE, README. Mirrors the
existing subtitles playbook structure (CHANGELOG + runs/_template).

CHANGELOG v1.0 lists known gaps (bin/cleanup-import.sh and
bin/normalize.py still doc-only, ROADMAP M6).

First run logged: playbooks/import-media/runs/lilo-stitch-2002.md.
Lilo & Stitch (2002) imported to /home/user/media/movies/, item
c2f4aff133c1b9631500fadf293b0b2f, TMDb 11544, MovieCount 3 -> 4.
LibraryMonitor didn't auto-fire — needed manual /Library/Refresh;
playbook updated to make this an unconditional step.

Source: 1080p BluRay HEVC 10-bit / EAC3 5.1 / 2x PGS embedded subs.
Per quality bar (README.md:41) — passes.
2026-05-10 02:29:57 +01:00

260 lines
8.8 KiB
Python
Executable file

#!/usr/bin/env python3
"""Subtitle fetcher v3 — Addic7ed via subliminal.
Free, no daily quota. Uses OpenSubtitles REST (search-only, no downloads,
no quota burn) to translate library S/E numbering to the show's primary
catalogue numbering (e.g. Hulu→Fox for American Dad), then drives
subliminal's addic7ed provider for the actual download.
Why v3: OS REST `/download` is capped at 20/day on free tier. Addic7ed
serves anonymous downloads with no daily limit. v2 (lib/sub-rest-fetch.py)
remains the right tool when quota isn't the bottleneck — addic7ed has
narrower coverage than OpenSubtitles (English only, mostly).
Picker: subliminal's own scoring against the matched Video (filename, S/E,
year). For AD, addic7ed catalogues by Fox airing order, so the script
remaps library Hulu numbering via per-ep IMDB id lookup on OS REST.
Usage:
sub-a7d-fetch.py <series-id> --season N [--start E] [--end E]
sub-a7d-fetch.py <series-id> --all
Env (required):
JELLYFIN_TOKEN X-Emby-Token for nullstone Jellyfin
OPENSUBTITLES_API_KEY Path to file holding the OS REST key (search only)
Env (optional):
NULLSTONE SSH target, default user@192.168.0.100
DRY_RUN=1 search + remap only, no download
"""
from __future__ import annotations
import argparse
import json
import os
import re
import shlex
import subprocess
import sys
import tempfile
import urllib.parse
from babelfish import Language
from subliminal import (Video, region, list_subtitles, download_subtitles,
save_subtitles)
OS_BASE = "https://api.opensubtitles.com/api/v1"
USER_AGENT = "arrflix v1.0.0"
JF_BASE = "http://localhost:8096"
NULLSTONE = os.environ.get("NULLSTONE", "user@192.168.0.100")
region.configure("dogpile.cache.memory")
def die(msg: str, code: int = 1) -> None:
print(f"ERROR: {msg}", file=sys.stderr)
sys.exit(code)
def env_or_die(name: str) -> str:
v = os.environ.get(name)
if not v:
die(f"{name} not set")
return v
def load_api_key() -> str:
path = env_or_die("OPENSUBTITLES_API_KEY")
with open(path) as f:
return f.read().strip()
def jellyfin(path: str, params: dict | None = None) -> dict:
tok = env_or_die("JELLYFIN_TOKEN")
qs = "?" + urllib.parse.urlencode(params, safe=",") if params else ""
url = JF_BASE + path + qs
cmd = ["ssh", NULLSTONE,
f"docker exec jellyfin curl -s -H 'X-Emby-Token: {tok}' {shlex.quote(url)}"]
return json.loads(subprocess.check_output(cmd, text=True))
def list_episodes(series_id: str) -> list[dict]:
d = jellyfin("/Items", {
"ParentId": series_id,
"IncludeItemTypes": "Episode",
"Recursive": "true",
"Fields": "Path,ParentIndexNumber,IndexNumber,ProviderIds",
"SortBy": "ParentIndexNumber,IndexNumber",
})
return d["Items"]
def imdb_strip(s: str | None) -> str | None:
if not s:
return None
return s[2:] if s.startswith("tt") else s
def os_search_imdb(api_key: str, imdb_no_tt: str) -> tuple[int, int] | None:
"""Look up the show's primary catalogue (season, episode) by per-ep IMDB id.
Uses OS feature_details S/E (which appears to align with what Addic7ed
indexes for at least the test shows). Search calls do not consume the
daily quota. If the resulting download mismatches expected dialogue,
consider re-running with the v2 OS REST path which uses imdb_id directly."""
cmd = ["curl", "-sSf",
"-H", f"Api-Key: {api_key}",
"-H", f"User-Agent: {USER_AGENT}",
f"{OS_BASE}/subtitles?imdb_id={imdb_no_tt}&languages=en&per_page=5"]
raw = subprocess.check_output(cmd)
j = json.loads(raw.decode())
for h in j.get("data", []):
fd = h.get("attributes", {}).get("feature_details", {})
s, e = fd.get("season_number"), fd.get("episode_number")
if s and e:
return int(s), int(e)
return None
def episode_to_paths(ep: dict) -> tuple[str, str]:
"""Return (remote_dir, base_filename) for sidecar placement on nullstone."""
container_path = ep["Path"]
host_path = container_path.replace("/media/", "/home/user/media/")
return os.path.dirname(host_path), os.path.splitext(os.path.basename(host_path))[0]
def addic7ed_safe_name(series: str, year: int | None, fox_s: int, fox_e: int) -> str:
"""Build filename that subliminal+addic7ed match. Strip '!' (breaks matcher)
and other punctuation; keep year if known."""
cleaned = re.sub(r"[!?:]", "", series).replace(" ", ".")
yearbit = f".{year}" if year else ""
return f"{cleaned}{yearbit}.S{fox_s:02d}E{fox_e:02d}.HDTV.x264.mkv"
def write_sidecar_remote(content: bytes, remote_path: str) -> None:
p = subprocess.Popen(["ssh", NULLSTONE, f"cat > {shlex.quote(remote_path)}"],
stdin=subprocess.PIPE)
p.communicate(content)
if p.returncode != 0:
die(f"failed writing {remote_path}")
def main() -> int:
ap = argparse.ArgumentParser()
ap.add_argument("series_id")
ap.add_argument("--season", type=int, default=None)
ap.add_argument("--start", type=int, default=1)
ap.add_argument("--end", type=int, default=10**6)
ap.add_argument("--all", action="store_true")
args = ap.parse_args()
if args.season is None and not args.all:
die("pass --season N or --all")
api_key = load_api_key()
dry = os.environ.get("DRY_RUN") == "1"
eps = list_episodes(args.series_id)
work = []
for ep in eps:
s, n = ep["ParentIndexNumber"], ep["IndexNumber"]
if not args.all and s != args.season:
continue
if not (args.start <= n <= args.end):
continue
work.append(ep)
if not work:
die("no episodes selected")
print(f"[plan] {len(work)} episodes selected", file=sys.stderr)
ok = 0
fail = []
for ep in work:
s, n = ep["ParentIndexNumber"], ep["IndexNumber"]
label = f"libS{s:02}E{n:02} {ep['Name']}"
imdb = imdb_strip(ep.get("ProviderIds", {}).get("Imdb"))
if not imdb:
print(f"[skip] {label} — no IMDB id", file=sys.stderr)
fail.append((label, "no-imdb"))
continue
try:
fox = os_search_imdb(api_key, imdb)
except subprocess.CalledProcessError as e:
print(f"[skip] {label} — OS search err {e.returncode}", file=sys.stderr)
fail.append((label, "os-search"))
continue
if fox is None:
print(f"[skip] {label} — OS has no S/E for imdb={imdb}", file=sys.stderr)
fail.append((label, "no-fox-se"))
continue
fox_s, fox_e = fox
# series name + year — pull from path or item
series_name = ep.get("SeriesName") or "Show"
year = None
ymatch = re.search(r"\((\d{4})\)", ep.get("Path", ""))
if ymatch:
year = int(ymatch.group(1))
v_name = addic7ed_safe_name(series_name, year, fox_s, fox_e)
v = Video.fromname(v_name)
try:
hits = list_subtitles([v], {Language("eng")},
providers=["addic7ed"]).get(v, [])
except Exception as e:
print(f"[skip] {label} — addic7ed list err: {type(e).__name__}",
file=sys.stderr)
fail.append((label, "a7d-list"))
continue
if not hits:
print(f"[skip] {label} — addic7ed 0 subs (foxS{fox_s:02}E{fox_e:02})",
file=sys.stderr)
fail.append((label, "a7d-no-hits"))
continue
pick = hits[0] # subliminal returns ordered; take first
print(f"[pick] {label} -> foxS{fox_s:02}E{fox_e:02} a7d={pick.id}",
file=sys.stderr)
if dry:
ok += 1
continue
try:
download_subtitles([pick])
except Exception as e:
print(f"[fail] {label} — addic7ed dl err: {type(e).__name__}: {e}",
file=sys.stderr)
fail.append((label, "a7d-dl"))
continue
if not pick.content:
print(f"[fail] {label} — empty content", file=sys.stderr)
fail.append((label, "empty"))
continue
remote_dir, base = episode_to_paths(ep)
dest = f"{remote_dir}/{base}.eng.srt"
write_sidecar_remote(pick.content, dest)
print(f"[ok] {label} -> {dest}", file=sys.stderr)
ok += 1
print(f"\n[done] ok={ok}/{len(work)} failures={len(fail)}", file=sys.stderr)
for lab, why in fail:
print(f" - {lab}: {why}", file=sys.stderr)
if ok:
try:
subprocess.run([os.path.join(os.path.dirname(__file__),
"audit-coverage.py")],
check=False)
except Exception as e:
print(f"[warn] coverage refresh skipped: {e}", file=sys.stderr)
return 0 if ok else 2
if __name__ == "__main__":
sys.exit(main())