From c6ec208520c2b72d4286f187fe0a938604ae0d94 Mon Sep 17 00:00:00 2001 From: s8n Date: Sun, 10 May 2026 02:19:32 +0100 Subject: [PATCH] processes/subtitles: COVERAGE.md live audit + auto-refresh on fetch Adds lib/audit-coverage.py: queries Jellyfin live for every series, every episode, and every movie; classifies each by whether the English subtitle comes from a sidecar, embedded stream, or doesn't exist; renders a Markdown report with one-char-per-episode bars for visual scanning. Output file is processes/subtitles/COVERAGE.md, regenerated on demand. v2 sub-rest-fetch.py and v3 sub-a7d-fetch.py now invoke the audit at end of a successful run, so the committed coverage file stays in sync with library state without manual intervention. v3.5 yt-fetch path skips the auto-call since it doesn't speak to Jellyfin directly; run audit manually after copying YT sidecars to nullstone. README.md surfaces the audit at the top so anyone landing in the recipe folder sees current state before starting a run. --- processes/subtitles/COVERAGE.md | 75 +++++++ processes/subtitles/README.md | 9 + processes/subtitles/lib/audit-coverage.py | 240 ++++++++++++++++++++++ processes/subtitles/lib/sub-a7d-fetch.py | 7 + processes/subtitles/lib/sub-rest-fetch.py | 7 + 5 files changed, 338 insertions(+) create mode 100644 processes/subtitles/COVERAGE.md create mode 100755 processes/subtitles/lib/audit-coverage.py diff --git a/processes/subtitles/COVERAGE.md b/processes/subtitles/COVERAGE.md new file mode 100644 index 0000000..f84c787 --- /dev/null +++ b/processes/subtitles/COVERAGE.md @@ -0,0 +1,75 @@ +# ARRFLIX subtitle coverage + +_Generated 2026-05-10 01:18 UTC by `processes/subtitles/lib/audit-coverage.py`._ +_Re-run: `JELLYFIN_TOKEN= processes/subtitles/lib/audit-coverage.py`._ + +Legend: `█` eng sidecar · `▒` eng embedded only · `▓` other-lang embedded · `·` none + +## TV shows + +``` +Show Eps sc emb none Status +────────────────────────────────────────────────────────────────────────────── +American Dad! 58 49 0 9 PARTIAL (84%) + ██████████████████████████·████████·█████······███ + ██·█████ + +Futurama 72 0 72 0 OK-EMBED (no sidecars) + ▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒ + ▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒ + +Obi-Wan Kenobi 6 0 6 0 OK-EMBED (no sidecars) + ▒▒▒▒▒▒ + +Rick and Morty 11 0 11 0 OK-EMBED (no sidecars) + ▒▒▒▒▒▒▒▒▒▒▒ + +Sassy the Sasquatch 5 5 0 0 OK (100%) + █████ + +Star Wars: Maul - Shadow Lord 10 0 10 0 OK-EMBED (no sidecars) + ▒▒▒▒▒▒▒▒▒▒ + +The Big Lez Saga (2022) 3 0 0 3 NEEDS SUBS + ··· + +The Donny & Clarence Show (2024) 5 0 0 5 NEEDS SUBS + ····· + +The Mandalorian 24 0 24 0 OK-EMBED (no sidecars) + ▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒ + +The Mike Nolan Show 3 0 0 3 NEEDS SUBS + ··· + +``` + +## Movies + +``` +Title sc emb Status +────────────────────────────────────────────────────────────────────────────── +Idiocracy 0 2 OK (embedded) +The Dark Knight 0 1 OK (embedded) +The Incredible Hulk 0 1 OK (embedded) +``` + +## Aggregate + +| Metric | Count | % | +|---|---:|---:| +| Episodes total | 197 | — | +| eng sidecar | 54 | 27% | +| eng embedded only | 123 | 62% | +| other-lang embedded only | 0 | 0% | +| no subs anywhere | 20 | 10% | +| Movies total | 3 | — | +| Movies with any eng sub | 3 | 100% | + +## Status meanings + +- **OK** — every episode has an external `.eng.srt` sidecar (STYLE.md happy path) +- **OK-EMBED** — all eps playable in English but no sidecars; `SaveSubtitlesWithMedia` won't trigger fetch since Jellyfin sees an eng track already +- **PARTIAL (X %)** — some sidecars, some gaps +- **NEEDS SUBS** — zero subs of any language; v3 / v3.5 / v4 fetch required +- **OTHER-LANG ONLY** (movies) — embedded subs exist but none in English diff --git a/processes/subtitles/README.md b/processes/subtitles/README.md index e738eaa..f590b1a 100644 --- a/processes/subtitles/README.md +++ b/processes/subtitles/README.md @@ -8,6 +8,15 @@ command, what to verify, and what to do on failure. Background reference for how Jellyfin and the OpenSubtitles plugin work together lives in [`docs/03-subtitles.md`](../../docs/03-subtitles.md). +> **Current state:** [`COVERAGE.md`](COVERAGE.md) is the live audit +> (per-show + per-movie). Regenerate at any time: +> +> ```bash +> JELLYFIN_TOKEN= processes/subtitles/lib/audit-coverage.py +> ``` +> +> Run after every fetch batch so the committed file stays accurate. +> > **Read [`STYLE.md`](STYLE.md) first.** Every fetch must hit the > bar set there: one English `.srt` per episode, plain (no SDH / no MT / no > AI / no Forced), best-quality release. The picker logic in v1/v2/v3 diff --git a/processes/subtitles/lib/audit-coverage.py b/processes/subtitles/lib/audit-coverage.py new file mode 100755 index 0000000..9efea78 --- /dev/null +++ b/processes/subtitles/lib/audit-coverage.py @@ -0,0 +1,240 @@ +#!/usr/bin/env python3 +"""ARRFLIX subtitle coverage audit — read-only. + +Queries Jellyfin live (via SSH+curl into the nullstone container), classifies +every TV episode and movie by the source of its English subtitle (sidecar / +embedded / none), and renders a Markdown report. Designed to be regenerated +on demand and committed alongside the recipe so the repo always has a +current view of what's subbed and what isn't. + +Usage: + JELLYFIN_TOKEN= \\ + processes/subtitles/lib/audit-coverage.py [--out PATH] + +Default output path: processes/subtitles/COVERAGE.md (relative to repo root). +With --stdout, prints to stdout instead of writing the file. + +Env (required): + JELLYFIN_TOKEN X-Emby-Token for nullstone Jellyfin + +Env (optional): + NULLSTONE SSH target, default user@192.168.0.100 + +Classification (per episode): + █ eng sidecar STYLE.md happy path + ▒ eng embedded only playable but doesn't satisfy "1 .eng.srt per ep" + ▓ other-lang embedded no English at all, only foreign subs muxed + · none nothing — fetch needed +""" +from __future__ import annotations + +import argparse +import collections +import datetime as _dt +import json +import os +import shlex +import subprocess +import sys +import urllib.parse + +NULLSTONE = os.environ.get("NULLSTONE", "user@192.168.0.100") +JF_BASE = "http://localhost:8096" +REPO_ROOT = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "..", "..")) +DEFAULT_OUT = os.path.join(REPO_ROOT, "processes", "subtitles", "COVERAGE.md") + + +def die(msg: str, code: int = 1) -> None: + print(f"ERROR: {msg}", file=sys.stderr) + sys.exit(code) + + +def jellyfin(path: str, params: dict | None = None) -> dict: + tok = os.environ.get("JELLYFIN_TOKEN") or die("JELLYFIN_TOKEN not set") + qs = "?" + urllib.parse.urlencode(params, safe=",") if params else "" + url = JF_BASE + path + qs + cmd = ["ssh", NULLSTONE, + f"docker exec jellyfin curl -s -H 'X-Emby-Token: {tok}' {shlex.quote(url)}"] + return json.loads(subprocess.check_output(cmd, text=True)) + + +def stream_summary(item: dict) -> dict: + out = {"eng_sidecar": 0, "eng_embed": 0, "other_sidecar": 0, + "other_embed": 0, "embedded_any": 0, "sub_total": 0} + for st in item.get("MediaStreams", []) or []: + if st.get("Type") != "Subtitle": + continue + out["sub_total"] += 1 + lang = (st.get("Language") or "").lower() + if st.get("IsExternal"): + if lang in ("eng", "en"): + out["eng_sidecar"] += 1 + else: + out["other_sidecar"] += 1 + else: + out["embedded_any"] += 1 + if lang in ("eng", "en"): + out["eng_embed"] += 1 + else: + out["other_embed"] += 1 + return out + + +def ep_status_char(s: dict) -> str: + if s["eng_sidecar"]: return "█" + if s["eng_embed"]: return "▒" + if s["embedded_any"]: return "▓" + if s["sub_total"] == 0: return "·" + return "?" + + +def render_show_block(name: str, eps: list[dict]) -> tuple[str, dict]: + eps.sort(key=lambda e: (e.get("ParentIndexNumber", 0), e.get("IndexNumber", 0))) + counts = {"eng_sc": 0, "eng_emb": 0, "embed_other": 0, "none": 0} + bar = [] + for e in eps: + sm = stream_summary(e) + if sm["eng_sidecar"]: counts["eng_sc"] += 1 + elif sm["eng_embed"]: counts["eng_emb"] += 1 + elif sm["embedded_any"]: counts["embed_other"] += 1 + else: counts["none"] += 1 + bar.append(ep_status_char(sm)) + + n = len(eps) + pct = counts["eng_sc"] * 100 // n if n else 0 + if counts["eng_sc"] == n: + status = f"OK ({pct}%)" + elif counts["eng_sc"] + counts["eng_emb"] == n: + status = "OK-EMBED (no sidecars)" + elif counts["none"] == n: + status = "NEEDS SUBS" + else: + status = f"PARTIAL ({pct}%)" + + line = (f"{name:<42} {n:>4} {counts['eng_sc']:>6} " + f"{counts['eng_emb']:>7} {counts['none']:>4} {status}") + bar_lines = [] + for i in range(0, len(bar), 50): + bar_lines.append(" " + "".join(bar[i:i+50])) + return line + "\n" + "\n".join(bar_lines), counts + + +def main() -> int: + ap = argparse.ArgumentParser() + ap.add_argument("--out", default=DEFAULT_OUT) + ap.add_argument("--stdout", action="store_true") + args = ap.parse_args() + + print("[audit] querying Jellyfin…", file=sys.stderr) + series = jellyfin("/Items", { + "IncludeItemTypes": "Series", + "Recursive": "true", + "Fields": "Path", + "SortBy": "SortName", + })["Items"] + eps = jellyfin("/Items", { + "IncludeItemTypes": "Episode", + "Recursive": "true", + "Fields": "Path,MediaStreams,SeriesName,ParentIndexNumber,IndexNumber", + })["Items"] + movies = jellyfin("/Items", { + "IncludeItemTypes": "Movie", + "Recursive": "true", + "Fields": "Path,MediaStreams", + "SortBy": "SortName", + })["Items"] + + by_series = collections.defaultdict(list) + for e in eps: + by_series[e.get("SeriesId") or e.get("SeasonId", "???")].append(e) + + now = _dt.datetime.now(_dt.timezone.utc).strftime("%Y-%m-%d %H:%M UTC") + out = [] + out.append("# ARRFLIX subtitle coverage") + out.append("") + out.append(f"_Generated {now} by `processes/subtitles/lib/audit-coverage.py`._") + out.append(f"_Re-run: `JELLYFIN_TOKEN= processes/subtitles/lib/audit-coverage.py`._") + out.append("") + out.append("Legend: `█` eng sidecar · `▒` eng embedded only · " + "`▓` other-lang embedded · `·` none") + out.append("") + out.append("## TV shows") + out.append("") + out.append("```") + out.append(f"{'Show':<42} {'Eps':>4} {'sc':>6} {'emb':>7} {'none':>4} Status") + out.append("─" * 78) + + agg = {"eng_sc": 0, "eng_emb": 0, "embed_other": 0, "none": 0, "total": 0} + for s in sorted(series, key=lambda x: x["Name"].lower()): + sid = s["Id"] + block, counts = render_show_block(s["Name"], by_series.get(sid, [])) + out.append(block) + out.append("") + for k in agg: + if k == "total": continue + agg[k] += counts[k] + agg["total"] += sum(counts.values()) + out.append("```") + out.append("") + + out.append("## Movies") + out.append("") + out.append("```") + out.append(f"{'Title':<58} {'sc':>6} {'emb':>7} Status") + out.append("─" * 78) + m_eng = 0 + for m in sorted(movies, key=lambda x: x["Name"].lower()): + sm = stream_summary(m) + if sm["eng_sidecar"]: + status = "OK (sidecar)" + elif sm["eng_embed"]: + status = "OK (embedded)" + elif sm["embedded_any"]: + status = "OTHER-LANG ONLY" + elif sm["sub_total"] == 0: + status = "NEEDS SUBS" + else: + status = "?" + if sm["eng_sidecar"] or sm["eng_embed"]: + m_eng += 1 + name = m["Name"] + if len(name) > 56: + name = name[:55] + "…" + out.append(f"{name:<58} {sm['eng_sidecar']:>6} {sm['eng_embed']:>7} {status}") + out.append("```") + out.append("") + + out.append("## Aggregate") + out.append("") + n = agg["total"] or 1 + out.append("| Metric | Count | % |") + out.append("|---|---:|---:|") + out.append(f"| Episodes total | {agg['total']} | — |") + out.append(f"| eng sidecar | {agg['eng_sc']} | {agg['eng_sc']*100//n}% |") + out.append(f"| eng embedded only | {agg['eng_emb']} | {agg['eng_emb']*100//n}% |") + out.append(f"| other-lang embedded only | {agg['embed_other']} | {agg['embed_other']*100//n}% |") + out.append(f"| no subs anywhere | {agg['none']} | {agg['none']*100//n}% |") + out.append(f"| Movies total | {len(movies)} | — |") + out.append(f"| Movies with any eng sub | {m_eng} | " + f"{m_eng*100//max(len(movies),1)}% |") + out.append("") + out.append("## Status meanings") + out.append("") + out.append("- **OK** — every episode has an external `.eng.srt` sidecar (STYLE.md happy path)") + out.append("- **OK-EMBED** — all eps playable in English but no sidecars; `SaveSubtitlesWithMedia` won't trigger fetch since Jellyfin sees an eng track already") + out.append("- **PARTIAL (X %)** — some sidecars, some gaps") + out.append("- **NEEDS SUBS** — zero subs of any language; v3 / v3.5 / v4 fetch required") + out.append("- **OTHER-LANG ONLY** (movies) — embedded subs exist but none in English") + + rendered = "\n".join(out) + "\n" + if args.stdout: + sys.stdout.write(rendered) + else: + with open(args.out, "w") as f: + f.write(rendered) + print(f"[audit] wrote {args.out}", file=sys.stderr) + return 0 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/processes/subtitles/lib/sub-a7d-fetch.py b/processes/subtitles/lib/sub-a7d-fetch.py index 68ff9b5..3654fd9 100755 --- a/processes/subtitles/lib/sub-a7d-fetch.py +++ b/processes/subtitles/lib/sub-a7d-fetch.py @@ -246,6 +246,13 @@ def main() -> int: print(f"\n[done] ok={ok}/{len(work)} failures={len(fail)}", file=sys.stderr) for lab, why in fail: print(f" - {lab}: {why}", file=sys.stderr) + if ok: + try: + subprocess.run([os.path.join(os.path.dirname(__file__), + "audit-coverage.py")], + check=False) + except Exception as e: + print(f"[warn] coverage refresh skipped: {e}", file=sys.stderr) return 0 if ok else 2 diff --git a/processes/subtitles/lib/sub-rest-fetch.py b/processes/subtitles/lib/sub-rest-fetch.py index 846a18a..032b426 100755 --- a/processes/subtitles/lib/sub-rest-fetch.py +++ b/processes/subtitles/lib/sub-rest-fetch.py @@ -278,6 +278,13 @@ def main() -> int: print(f"\n[done] ok={ok}/{len(work)} failures={len(fail)}", file=sys.stderr) for lab, why in fail: print(f" - {lab}: {why}", file=sys.stderr) + if ok: + try: + subprocess.run([os.path.join(os.path.dirname(__file__), + "audit-coverage.py")], + check=False) + except Exception as e: + print(f"[warn] coverage refresh skipped: {e}", file=sys.stderr) return 0 if ok else 2