#!/usr/bin/env python3
"""Generate the action-first weekly AI-visibility report from snapshots."""
import json, os, glob, datetime, collections

ROOT=os.path.dirname(os.path.abspath(__file__))
snaps=sorted(glob.glob(os.path.join(ROOT,"snapshots","*.json")))
cur=json.load(open(snaps[-1])); prev=json.load(open(snaps[-2])) if len(snaps)>1 else None
rows=[r for r in cur["rows"] if r["ok"]]

def share(rs):
    n=len(rs);
    return (sum(1 for r in rs if r["gig"]),n, (sum(1 for r in rs if r["gig"])/n*100 if n else 0))

def by(key):
    d=collections.defaultdict(list)
    for r in rows: d[r[key]].append(r)
    return {k:share(v) for k,v in d.items()}

overall=share(rows)
phases=by("phase"); engines=by("engine"); lobs=by("lob")

# prompt-level: cited by how many engines
pr=collections.defaultdict(lambda:{"gig":0,"n":0,"comp":set(),"q":"","phase":"","lob":""})
for r in rows:
    p=pr[r["id"]]; p["n"]+=1; p["gig"]+=1 if r["gig"] else 0
    p["comp"].update(r["competitors"]); p["q"]=r["q"]; p["phase"]=r["phase"]; p["lob"]=r["lob"]

# competitor leaderboard
comp=collections.Counter()
for r in rows:
    for c in r["competitors"]: comp[c]+=1

# ACTIONS: non-brand prompts (Category/Attribute) where GIG absent on all/most engines but competitors present
actions=[]
for pid,p in pr.items():
    if p["phase"] in ("Category","Attribute","Utility") and p["gig"]<=1 and p["comp"]:
        actions.append((p["q"],p["lob"],p["phase"],sorted(p["comp"])[:4],p["gig"],p["n"]))
actions.sort(key=lambda x:(x[4], 0 if x[2]=="Category" else 1))

PHASE_ORDER=["Category","Attribute","Competitive","Trust","Utility"]
PHASE_NOTE={"Category":"non-branded category search — upstream of the funnel, where new buyers start",
 "Attribute":"feature/product questions","Competitive":"head-to-head comparisons","Trust":"brand & reputation","Utility":"govt-service how-tos"}

def delta(cur_share):
    if not prev: return ""
    pv=[r for r in prev["rows"] if r["ok"]]
    return ""  # baseline week

date=cur["date"]
cat_pct=phases.get('Category',(0,0,0))[2]
story_phrase = "strong where buyers already know the brand and weak where they do not" if cat_pct < overall[2] else "building category presence"
baseline_line = "Baseline week — deltas start next run." if not prev else "vs last week."
brandwin = max((phases.get('Trust',(0,0,0))[2], phases.get('Utility',(0,0,0))[2]))
H=[]
H.append(f"""<!DOCTYPE html><html lang=en><head><meta charset=utf-8><meta name=viewport content="width=device-width,initial-scale=1">
<title>GIG AI-Visibility — Weekly Report {date}</title><style>
:root{{color-scheme:light;--ink:#0A0A0A;--bg:#FAF7F2;--red:#C0392B;--blue:#2E2E85;--green:#1F6B3F;--gold:#9a5414;--bord:#E5E0D5;--mut:#6b6459}}
*{{box-sizing:border-box}}body{{margin:0;background:var(--bg);color:var(--ink);font-family:Charter,Georgia,serif;line-height:1.5}}
.wrap{{max-width:880px;margin:0 auto;padding:30px 24px 70px}}
.eyebrow{{font-family:'SF Mono',monospace;font-size:11px;letter-spacing:.14em;text-transform:uppercase;color:var(--mut)}}
h1{{font-size:27px;margin:6px 0 2px}}h2{{font-size:16px;margin:26px 0 6px;font-family:'Source Sans Pro',sans-serif;text-transform:uppercase;letter-spacing:.05em;color:var(--blue);border-top:2px solid var(--bord);padding-top:14px}}
p,li,td,th{{font-size:14.5px}}.sub{{color:var(--mut);font-size:13.5px;margin:0 0 8px}}
.big{{display:flex;gap:14px;flex-wrap:wrap;margin:12px 0}}
.kpi{{background:#fff;border:1px solid var(--bord);border-radius:9px;padding:13px 16px;min-width:150px}}
.kpi .v{{font-size:30px;font-weight:800;font-family:'Source Sans Pro',sans-serif}}
.kpi .l{{font-size:12px;color:var(--mut);font-family:'Source Sans Pro',sans-serif}}
table{{border-collapse:collapse;width:100%;margin:8px 0;font-family:'Source Sans Pro',sans-serif;font-size:13.5px}}
th,td{{border:1px solid var(--bord);padding:6px 9px;text-align:left}}th{{background:#f0ebe1}}
td.c,th.c{{text-align:center}}
.bar{{height:11px;border-radius:3px;background:var(--green);display:inline-block;vertical-align:middle}}
.lo .bar{{background:var(--red)}}.mid .bar{{background:var(--gold)}}
.act{{background:#fff6f5;border:1px solid #f1c9c4;border-left:4px solid var(--red);border-radius:7px;padding:8px 14px;margin:8px 0}}
.foot{{margin-top:30px;padding-top:14px;border-top:2px solid var(--bord);font-size:12.5px;color:var(--mut);font-family:'Source Sans Pro',sans-serif}}
strong{{font-weight:700}}</style></head><body><div class=wrap>
<div class=eyebrow>GIG Gulf · AI-Visibility · Weekly Report</div>
<h1>Where GIG shows up when buyers ask AI</h1>
<p class=sub>Week of {date} · {len(rows)} answers across ChatGPT, Claude &amp; Google · {len(pr)} buyer prompts · UAE. {baseline_line}</p>
<div class=big>
<div class=kpi><div class=v>{overall[2]:.0f}%</div><div class=l>Overall AI citation share<br>({overall[0]}/{overall[1]} answers)</div></div>
<div class=kpi><div class=v style="color:var(--red)">{cat_pct:.0f}%</div><div class=l>Category (non-brand)<br>where new buyers start</div></div>
<div class=kpi><div class=v style="color:var(--green)">{brandwin:.0f}%</div><div class=l>Brand / Trust / Utility<br>where GIG already wins</div></div>
</div>
<p><strong>The story in one line:</strong> GIG is <strong>highly visible</strong> in UAE AI answers — but a large share of that visibility rides on <strong>AXA legacy equity</strong> (GIG Gulf is the rebranded AXA Gulf, and AXA queries deliver up to ~25% of GWP). Counting AXA as GIG-owned, citation share is {overall[2]:.0f}% overall and {cat_pct:.0f}% on non-branded category questions.</p>
<div class=act style="border-left-color:var(--green);background:#f0f8f2;border-color:#cfe8d6">
<strong>AXA legacy = owned demand, not a leak.</strong> AXA appears in nearly every category answer. That is GIG harvesting AXA-searcher intent — protect and feed it, don't scrub it. Note: an external GEO audit that counts only the literal string "GIG Gulf" will <em>understate</em> true visibility and report a category gap that the AXA equity is already filling.</div>
""")

# by phase
H.append("<h2>By funnel phase — where we win and lose</h2><table><tr><th>Phase</th><th class=c>GIG cited</th><th>Visibility</th><th>What it is</th></tr>")
for ph in PHASE_ORDER:
    if ph in phases:
        g,n,pct=phases[ph]; cls='lo' if pct<25 else ('mid' if pct<60 else '')
        H.append(f"<tr class={cls}><td><strong>{ph}</strong></td><td class=c>{g}/{n} = {pct:.0f}%</td><td><span class=bar style='width:{pct*1.6:.0f}px'></span></td><td>{PHASE_NOTE[ph]}</td></tr>")
H.append("</table>")

# by engine
H.append("<h2>By engine</h2><table><tr><th>Engine</th><th class=c>GIG citation share</th></tr>")
for e,(g,n,pct) in sorted(engines.items()): H.append(f"<tr><td>{e}</td><td class=c>{g}/{n} = {pct:.0f}%</td></tr>")
H.append("</table>")

# competitor leaderboard
H.append("<h2>Who takes the answer instead — competitor mentions</h2><table><tr><th>Brand</th><th class=c>Appearances (of "+str(len(rows))+" answers)</th></tr>")
for c,cnt in comp.most_common(8): H.append(f"<tr><td>{c}</td><td class=c>{cnt}</td></tr>")
H.append("</table>")

# ACTIONS
H.append("<h2>Do these this week — non-brand prompts GIG is losing</h2>")
H.append("<p class=sub>Category &amp; attribute questions where GIG was cited by <strong>zero</strong> engines but competitors were. Each is a page to build or fix.</p>")
if not actions: H.append("<p>No weak category/attribute prompts this run.</p>")
for q,lob,ph,cs,g,n in actions[:6]:
    H.append(f"<div class=act><strong>{q}</strong><br><span class=sub>{lob} · {ph} · GIG cited by {g}/{n} engines · taking the answer instead: {', '.join(cs)}</span></div>")

# GA4 captured AI traffic + "how much of organic is AI" estimate (reads ga4-aitraffic.json)
try:
    ga=json.load(open(os.path.join(ROOT,"ga4-aitraffic.json")))
    ch=ga["channels"]; est=ga["estimate"]; tot=ch["total_sessions"]; org=ch["organic_search"]
    aiT=ga["ai_sessions_total"]; lo=est["ai_mediated_organic_low"]; hi=est["ai_mediated_organic_high"]
    H.append("<h2>Captured AI traffic vs what we measure (GA4)</h2>")
    H.append(f"<p class=sub>GA4 'Generative AI Sessions', {ga['period'].replace('..','&ndash;')} &mdash; <strong>{aiT:,}</strong> AI-referred sessions ({ga['ai_engagement_rate']*100:.0f}% engaged). GA4 only sees the clicks.</p>")
    H.append("<table><tr><th>AI source</th><th class=c>Sessions</th><th class=c>Share</th></tr>")
    for s in ga["ai_by_source"]:
        H.append(f"<tr><td>{s['source']}</td><td class=c>{s['sessions']:,}</td><td class=c>{s['share']*100:.1f}%</td></tr>")
    H.append(f"<tr><td><strong>Total</strong></td><td class=c><strong>{aiT:,}</strong></td><td class=c>100%</td></tr></table>")
    H.append(f"<div class=act style='border-left-color:#2E2E85;background:#eef0f8;border-color:#cfd3ec'><strong>How much of our organic traffic is AI &mdash; best guess.</strong> Measured AI referrals: <strong>{aiT:,}</strong> ({aiT/tot*100:.1f}% of all {tot:,} sessions; {aiT/org*100:.0f}% the size of the {org:,} Organic-Search channel). Add an estimated <strong>{lo:,}&ndash;{hi:,}</strong> AI-Overview-mediated organic sessions and AI drives the equivalent of roughly <strong>{(aiT+lo)/org*100:.0f}&ndash;{(aiT+hi)/org*100:.0f}% of organic</strong> today &mdash; counting only clicks, before the zero-click majority. <span style='opacity:.75'>{est['assumptions']}</span></div>")
except Exception as e:
    H.append(f"<!-- ga4 section skipped: {e} -->")

H.append(f"""<h2>How to read this vs the agency pitch</h2>
<p>This is the same measurement an external GEO agency charges $3.5k–9.5k/month for — run in-house, on your own prompt set, and built to end with <strong>actions</strong>, not just a score. Next week it shows movement: which prompts we won back, which competitors moved.</p>
<div class=foot>GIG Gulf · AI-Visibility tool (Articulate) · prompts &amp; snapshots in <span style="font-family:monospace">AIVisibility/</span> · re-run weekly: <span style="font-family:monospace">python3 run.py &amp;&amp; python3 report.py</span></div>
</div></body></html>""")

open(os.path.join(ROOT,"weekly-report.html"),"w").write("".join(H))
print("wrote weekly-report.html")
print(f"overall {overall[2]:.0f}% | Category {phases.get('Category',(0,0,0))[2]:.0f}% | actions {len(actions)}")
for ph in PHASE_ORDER:
    if ph in phases: print(f"  {ph}: {phases[ph][2]:.0f}%")
