legacy-arrflix/playbooks/subtitles/lib/audit-coverage.py
s8n 24a9497e7d playbooks/ rename + import-media v1.0 + lilo&stitch run
processes/ -> playbooks/ (git mv preserves history; updated cross-refs
in ROADMAP, README, subtitles playbook + scripts).

playbooks/import-media/README.md v1.0 — 7-step import workflow:
  stage on onyx -> rsync to nullstone -> chmod -> verify scan ->
  Items/Counts bump -> optional subtitle pass -> run-log
Cross-references docs/05/07/08, ADMIN-GUIDE, README. Mirrors the
existing subtitles playbook structure (CHANGELOG + runs/_template).

CHANGELOG v1.0 lists known gaps (bin/cleanup-import.sh and
bin/normalize.py still doc-only, ROADMAP M6).

First run logged: playbooks/import-media/runs/lilo-stitch-2002.md.
Lilo & Stitch (2002) imported to /home/user/media/movies/, item
c2f4aff133c1b9631500fadf293b0b2f, TMDb 11544, MovieCount 3 -> 4.
LibraryMonitor didn't auto-fire — needed manual /Library/Refresh;
playbook updated to make this an unconditional step.

Source: 1080p BluRay HEVC 10-bit / EAC3 5.1 / 2x PGS embedded subs.
Per quality bar (README.md:41) — passes.
2026-05-10 02:29:57 +01:00

240 lines
8.6 KiB
Python
Executable file

#!/usr/bin/env python3
"""ARRFLIX subtitle coverage audit — read-only.
Queries Jellyfin live (via SSH+curl into the nullstone container), classifies
every TV episode and movie by the source of its English subtitle (sidecar /
embedded / none), and renders a Markdown report. Designed to be regenerated
on demand and committed alongside the recipe so the repo always has a
current view of what's subbed and what isn't.
Usage:
JELLYFIN_TOKEN=<admin-token> \\
playbooks/subtitles/lib/audit-coverage.py [--out PATH]
Default output path: playbooks/subtitles/COVERAGE.md (relative to repo root).
With --stdout, prints to stdout instead of writing the file.
Env (required):
JELLYFIN_TOKEN X-Emby-Token for nullstone Jellyfin
Env (optional):
NULLSTONE SSH target, default user@192.168.0.100
Classification (per episode):
█ eng sidecar STYLE.md happy path
▒ eng embedded only playable but doesn't satisfy "1 .eng.srt per ep"
▓ other-lang embedded no English at all, only foreign subs muxed
· none nothing — fetch needed
"""
from __future__ import annotations
import argparse
import collections
import datetime as _dt
import json
import os
import shlex
import subprocess
import sys
import urllib.parse
NULLSTONE = os.environ.get("NULLSTONE", "user@192.168.0.100")
JF_BASE = "http://localhost:8096"
REPO_ROOT = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "..", ".."))
DEFAULT_OUT = os.path.join(REPO_ROOT, "processes", "subtitles", "COVERAGE.md")
def die(msg: str, code: int = 1) -> None:
print(f"ERROR: {msg}", file=sys.stderr)
sys.exit(code)
def jellyfin(path: str, params: dict | None = None) -> dict:
tok = os.environ.get("JELLYFIN_TOKEN") or die("JELLYFIN_TOKEN not set")
qs = "?" + urllib.parse.urlencode(params, safe=",") if params else ""
url = JF_BASE + path + qs
cmd = ["ssh", NULLSTONE,
f"docker exec jellyfin curl -s -H 'X-Emby-Token: {tok}' {shlex.quote(url)}"]
return json.loads(subprocess.check_output(cmd, text=True))
def stream_summary(item: dict) -> dict:
out = {"eng_sidecar": 0, "eng_embed": 0, "other_sidecar": 0,
"other_embed": 0, "embedded_any": 0, "sub_total": 0}
for st in item.get("MediaStreams", []) or []:
if st.get("Type") != "Subtitle":
continue
out["sub_total"] += 1
lang = (st.get("Language") or "").lower()
if st.get("IsExternal"):
if lang in ("eng", "en"):
out["eng_sidecar"] += 1
else:
out["other_sidecar"] += 1
else:
out["embedded_any"] += 1
if lang in ("eng", "en"):
out["eng_embed"] += 1
else:
out["other_embed"] += 1
return out
def ep_status_char(s: dict) -> str:
if s["eng_sidecar"]: return ""
if s["eng_embed"]: return ""
if s["embedded_any"]: return ""
if s["sub_total"] == 0: return "·"
return "?"
def render_show_block(name: str, eps: list[dict]) -> tuple[str, dict]:
eps.sort(key=lambda e: (e.get("ParentIndexNumber", 0), e.get("IndexNumber", 0)))
counts = {"eng_sc": 0, "eng_emb": 0, "embed_other": 0, "none": 0}
bar = []
for e in eps:
sm = stream_summary(e)
if sm["eng_sidecar"]: counts["eng_sc"] += 1
elif sm["eng_embed"]: counts["eng_emb"] += 1
elif sm["embedded_any"]: counts["embed_other"] += 1
else: counts["none"] += 1
bar.append(ep_status_char(sm))
n = len(eps)
pct = counts["eng_sc"] * 100 // n if n else 0
if counts["eng_sc"] == n:
status = f"OK ({pct}%)"
elif counts["eng_sc"] + counts["eng_emb"] == n:
status = "OK-EMBED (no sidecars)"
elif counts["none"] == n:
status = "NEEDS SUBS"
else:
status = f"PARTIAL ({pct}%)"
line = (f"{name:<42} {n:>4} {counts['eng_sc']:>6} "
f"{counts['eng_emb']:>7} {counts['none']:>4} {status}")
bar_lines = []
for i in range(0, len(bar), 50):
bar_lines.append(" " + "".join(bar[i:i+50]))
return line + "\n" + "\n".join(bar_lines), counts
def main() -> int:
ap = argparse.ArgumentParser()
ap.add_argument("--out", default=DEFAULT_OUT)
ap.add_argument("--stdout", action="store_true")
args = ap.parse_args()
print("[audit] querying Jellyfin…", file=sys.stderr)
series = jellyfin("/Items", {
"IncludeItemTypes": "Series",
"Recursive": "true",
"Fields": "Path",
"SortBy": "SortName",
})["Items"]
eps = jellyfin("/Items", {
"IncludeItemTypes": "Episode",
"Recursive": "true",
"Fields": "Path,MediaStreams,SeriesName,ParentIndexNumber,IndexNumber",
})["Items"]
movies = jellyfin("/Items", {
"IncludeItemTypes": "Movie",
"Recursive": "true",
"Fields": "Path,MediaStreams",
"SortBy": "SortName",
})["Items"]
by_series = collections.defaultdict(list)
for e in eps:
by_series[e.get("SeriesId") or e.get("SeasonId", "???")].append(e)
now = _dt.datetime.now(_dt.timezone.utc).strftime("%Y-%m-%d %H:%M UTC")
out = []
out.append("# ARRFLIX subtitle coverage")
out.append("")
out.append(f"_Generated {now} by `playbooks/subtitles/lib/audit-coverage.py`._")
out.append(f"_Re-run: `JELLYFIN_TOKEN=<admin-token> playbooks/subtitles/lib/audit-coverage.py`._")
out.append("")
out.append("Legend: `█` eng sidecar · `▒` eng embedded only · "
"`▓` other-lang embedded · `·` none")
out.append("")
out.append("## TV shows")
out.append("")
out.append("```")
out.append(f"{'Show':<42} {'Eps':>4} {'sc':>6} {'emb':>7} {'none':>4} Status")
out.append("" * 78)
agg = {"eng_sc": 0, "eng_emb": 0, "embed_other": 0, "none": 0, "total": 0}
for s in sorted(series, key=lambda x: x["Name"].lower()):
sid = s["Id"]
block, counts = render_show_block(s["Name"], by_series.get(sid, []))
out.append(block)
out.append("")
for k in agg:
if k == "total": continue
agg[k] += counts[k]
agg["total"] += sum(counts.values())
out.append("```")
out.append("")
out.append("## Movies")
out.append("")
out.append("```")
out.append(f"{'Title':<58} {'sc':>6} {'emb':>7} Status")
out.append("" * 78)
m_eng = 0
for m in sorted(movies, key=lambda x: x["Name"].lower()):
sm = stream_summary(m)
if sm["eng_sidecar"]:
status = "OK (sidecar)"
elif sm["eng_embed"]:
status = "OK (embedded)"
elif sm["embedded_any"]:
status = "OTHER-LANG ONLY"
elif sm["sub_total"] == 0:
status = "NEEDS SUBS"
else:
status = "?"
if sm["eng_sidecar"] or sm["eng_embed"]:
m_eng += 1
name = m["Name"]
if len(name) > 56:
name = name[:55] + ""
out.append(f"{name:<58} {sm['eng_sidecar']:>6} {sm['eng_embed']:>7} {status}")
out.append("```")
out.append("")
out.append("## Aggregate")
out.append("")
n = agg["total"] or 1
out.append("| Metric | Count | % |")
out.append("|---|---:|---:|")
out.append(f"| Episodes total | {agg['total']} | — |")
out.append(f"| eng sidecar | {agg['eng_sc']} | {agg['eng_sc']*100//n}% |")
out.append(f"| eng embedded only | {agg['eng_emb']} | {agg['eng_emb']*100//n}% |")
out.append(f"| other-lang embedded only | {agg['embed_other']} | {agg['embed_other']*100//n}% |")
out.append(f"| no subs anywhere | {agg['none']} | {agg['none']*100//n}% |")
out.append(f"| Movies total | {len(movies)} | — |")
out.append(f"| Movies with any eng sub | {m_eng} | "
f"{m_eng*100//max(len(movies),1)}% |")
out.append("")
out.append("## Status meanings")
out.append("")
out.append("- **OK** — every episode has an external `.eng.srt` sidecar (STYLE.md happy path)")
out.append("- **OK-EMBED** — all eps playable in English but no sidecars; `SaveSubtitlesWithMedia` won't trigger fetch since Jellyfin sees an eng track already")
out.append("- **PARTIAL (X %)** — some sidecars, some gaps")
out.append("- **NEEDS SUBS** — zero subs of any language; v3 / v3.5 / v4 fetch required")
out.append("- **OTHER-LANG ONLY** (movies) — embedded subs exist but none in English")
rendered = "\n".join(out) + "\n"
if args.stdout:
sys.stdout.write(rendered)
else:
with open(args.out, "w") as f:
f.write(rendered)
print(f"[audit] wrote {args.out}", file=sys.stderr)
return 0
if __name__ == "__main__":
sys.exit(main())