legacy-arrflix/processes/subtitles/lib/sub-a7d-fetch.py
s8n 43f55643be processes/subtitles: v3 Addic7ed fetcher + AD 49/58 subbed
Adds lib/sub-a7d-fetch.py: free, no-daily-cap path via subliminal's
addic7ed provider (anonymous). Uses OpenSubtitles REST search-only (no
quota cost) to translate library S/E to the show's primary catalogue
numbering, then drives subliminal to download from Addic7ed and writes
sidecars direct to nullstone via SSH.

Picker quirks: subliminal series-name matcher is broken by '!' in the
title, so the script strips it before building the synthetic
Video.fromname() string. OS feature_details S/E happens to align with
Addic7ed's indexing for the test show (American Dad).

Recipe README now reflects three paths in cheapest-first order: v3
Addic7ed, v2 OS REST (20/day), v1 plugin. American Dad run log updated
to 49/58 (S01 7/7 v1, S02 16/16 mixed v2/v3, S03 16/19 v3, S04 10/16
v3). 9 misses identified, deferred to next OS REST quota window.
2026-05-09 23:31:10 +01:00

253 lines
8.5 KiB
Python
Executable file

#!/usr/bin/env python3
"""Subtitle fetcher v3 — Addic7ed via subliminal.
Free, no daily quota. Uses OpenSubtitles REST (search-only, no downloads,
no quota burn) to translate library S/E numbering to the show's primary
catalogue numbering (e.g. Hulu→Fox for American Dad), then drives
subliminal's addic7ed provider for the actual download.
Why v3: OS REST `/download` is capped at 20/day on free tier. Addic7ed
serves anonymous downloads with no daily limit. v2 (lib/sub-rest-fetch.py)
remains the right tool when quota isn't the bottleneck — addic7ed has
narrower coverage than OpenSubtitles (English only, mostly).
Picker: subliminal's own scoring against the matched Video (filename, S/E,
year). For AD, addic7ed catalogues by Fox airing order, so the script
remaps library Hulu numbering via per-ep IMDB id lookup on OS REST.
Usage:
sub-a7d-fetch.py <series-id> --season N [--start E] [--end E]
sub-a7d-fetch.py <series-id> --all
Env (required):
JELLYFIN_TOKEN X-Emby-Token for nullstone Jellyfin
OPENSUBTITLES_API_KEY Path to file holding the OS REST key (search only)
Env (optional):
NULLSTONE SSH target, default user@192.168.0.100
DRY_RUN=1 search + remap only, no download
"""
from __future__ import annotations
import argparse
import json
import os
import re
import shlex
import subprocess
import sys
import tempfile
import urllib.parse
from babelfish import Language
from subliminal import (Video, region, list_subtitles, download_subtitles,
save_subtitles)
OS_BASE = "https://api.opensubtitles.com/api/v1"
USER_AGENT = "arrflix v1.0.0"
JF_BASE = "http://localhost:8096"
NULLSTONE = os.environ.get("NULLSTONE", "user@192.168.0.100")
region.configure("dogpile.cache.memory")
def die(msg: str, code: int = 1) -> None:
print(f"ERROR: {msg}", file=sys.stderr)
sys.exit(code)
def env_or_die(name: str) -> str:
v = os.environ.get(name)
if not v:
die(f"{name} not set")
return v
def load_api_key() -> str:
path = env_or_die("OPENSUBTITLES_API_KEY")
with open(path) as f:
return f.read().strip()
def jellyfin(path: str, params: dict | None = None) -> dict:
tok = env_or_die("JELLYFIN_TOKEN")
qs = "?" + urllib.parse.urlencode(params, safe=",") if params else ""
url = JF_BASE + path + qs
cmd = ["ssh", NULLSTONE,
f"docker exec jellyfin curl -s -H 'X-Emby-Token: {tok}' {shlex.quote(url)}"]
return json.loads(subprocess.check_output(cmd, text=True))
def list_episodes(series_id: str) -> list[dict]:
d = jellyfin("/Items", {
"ParentId": series_id,
"IncludeItemTypes": "Episode",
"Recursive": "true",
"Fields": "Path,ParentIndexNumber,IndexNumber,ProviderIds",
"SortBy": "ParentIndexNumber,IndexNumber",
})
return d["Items"]
def imdb_strip(s: str | None) -> str | None:
if not s:
return None
return s[2:] if s.startswith("tt") else s
def os_search_imdb(api_key: str, imdb_no_tt: str) -> tuple[int, int] | None:
"""Look up the show's primary catalogue (season, episode) by per-ep IMDB id.
Uses OS feature_details S/E (which appears to align with what Addic7ed
indexes for at least the test shows). Search calls do not consume the
daily quota. If the resulting download mismatches expected dialogue,
consider re-running with the v2 OS REST path which uses imdb_id directly."""
cmd = ["curl", "-sSf",
"-H", f"Api-Key: {api_key}",
"-H", f"User-Agent: {USER_AGENT}",
f"{OS_BASE}/subtitles?imdb_id={imdb_no_tt}&languages=en&per_page=5"]
raw = subprocess.check_output(cmd)
j = json.loads(raw.decode())
for h in j.get("data", []):
fd = h.get("attributes", {}).get("feature_details", {})
s, e = fd.get("season_number"), fd.get("episode_number")
if s and e:
return int(s), int(e)
return None
def episode_to_paths(ep: dict) -> tuple[str, str]:
"""Return (remote_dir, base_filename) for sidecar placement on nullstone."""
container_path = ep["Path"]
host_path = container_path.replace("/media/", "/home/user/media/")
return os.path.dirname(host_path), os.path.splitext(os.path.basename(host_path))[0]
def addic7ed_safe_name(series: str, year: int | None, fox_s: int, fox_e: int) -> str:
"""Build filename that subliminal+addic7ed match. Strip '!' (breaks matcher)
and other punctuation; keep year if known."""
cleaned = re.sub(r"[!?:]", "", series).replace(" ", ".")
yearbit = f".{year}" if year else ""
return f"{cleaned}{yearbit}.S{fox_s:02d}E{fox_e:02d}.HDTV.x264.mkv"
def write_sidecar_remote(content: bytes, remote_path: str) -> None:
p = subprocess.Popen(["ssh", NULLSTONE, f"cat > {shlex.quote(remote_path)}"],
stdin=subprocess.PIPE)
p.communicate(content)
if p.returncode != 0:
die(f"failed writing {remote_path}")
def main() -> int:
ap = argparse.ArgumentParser()
ap.add_argument("series_id")
ap.add_argument("--season", type=int, default=None)
ap.add_argument("--start", type=int, default=1)
ap.add_argument("--end", type=int, default=10**6)
ap.add_argument("--all", action="store_true")
args = ap.parse_args()
if args.season is None and not args.all:
die("pass --season N or --all")
api_key = load_api_key()
dry = os.environ.get("DRY_RUN") == "1"
eps = list_episodes(args.series_id)
work = []
for ep in eps:
s, n = ep["ParentIndexNumber"], ep["IndexNumber"]
if not args.all and s != args.season:
continue
if not (args.start <= n <= args.end):
continue
work.append(ep)
if not work:
die("no episodes selected")
print(f"[plan] {len(work)} episodes selected", file=sys.stderr)
ok = 0
fail = []
for ep in work:
s, n = ep["ParentIndexNumber"], ep["IndexNumber"]
label = f"libS{s:02}E{n:02} {ep['Name']}"
imdb = imdb_strip(ep.get("ProviderIds", {}).get("Imdb"))
if not imdb:
print(f"[skip] {label} — no IMDB id", file=sys.stderr)
fail.append((label, "no-imdb"))
continue
try:
fox = os_search_imdb(api_key, imdb)
except subprocess.CalledProcessError as e:
print(f"[skip] {label} — OS search err {e.returncode}", file=sys.stderr)
fail.append((label, "os-search"))
continue
if fox is None:
print(f"[skip] {label} — OS has no S/E for imdb={imdb}", file=sys.stderr)
fail.append((label, "no-fox-se"))
continue
fox_s, fox_e = fox
# series name + year — pull from path or item
series_name = ep.get("SeriesName") or "Show"
year = None
ymatch = re.search(r"\((\d{4})\)", ep.get("Path", ""))
if ymatch:
year = int(ymatch.group(1))
v_name = addic7ed_safe_name(series_name, year, fox_s, fox_e)
v = Video.fromname(v_name)
try:
hits = list_subtitles([v], {Language("eng")},
providers=["addic7ed"]).get(v, [])
except Exception as e:
print(f"[skip] {label} — addic7ed list err: {type(e).__name__}",
file=sys.stderr)
fail.append((label, "a7d-list"))
continue
if not hits:
print(f"[skip] {label} — addic7ed 0 subs (foxS{fox_s:02}E{fox_e:02})",
file=sys.stderr)
fail.append((label, "a7d-no-hits"))
continue
pick = hits[0] # subliminal returns ordered; take first
print(f"[pick] {label} -> foxS{fox_s:02}E{fox_e:02} a7d={pick.id}",
file=sys.stderr)
if dry:
ok += 1
continue
try:
download_subtitles([pick])
except Exception as e:
print(f"[fail] {label} — addic7ed dl err: {type(e).__name__}: {e}",
file=sys.stderr)
fail.append((label, "a7d-dl"))
continue
if not pick.content:
print(f"[fail] {label} — empty content", file=sys.stderr)
fail.append((label, "empty"))
continue
remote_dir, base = episode_to_paths(ep)
dest = f"{remote_dir}/{base}.eng.srt"
write_sidecar_remote(pick.content, dest)
print(f"[ok] {label} -> {dest}", file=sys.stderr)
ok += 1
print(f"\n[done] ok={ok}/{len(work)} failures={len(fail)}", file=sys.stderr)
for lab, why in fail:
print(f" - {lab}: {why}", file=sys.stderr)
return 0 if ok else 2
if __name__ == "__main__":
sys.exit(main())