#!/usr/bin/env python3
"""
SEO Ranking Monitor for thalx.io

Checks Google search rankings for target keywords using googlesearch-python.
Updates data.json with results. Designed for weekly cron execution.

Usage:
    source ~/agents-claude-env/bin/activate
    python3 ~/playgrounds/seo-monitor-thalx/check-rankings.py

Cron (weekly Sunday 6:37am):
    37 6 * * 0 /home/clawd/agents-claude-env/bin/python3 /home/clawd/playgrounds/seo-monitor-thalx/check-rankings.py >> /tmp/seo-thalx.log 2>&1
"""

import json
import os
import sys
import time
import urllib.request
import urllib.parse
from datetime import datetime, timedelta, timezone
from pathlib import Path

# ---------------------------------------------------------------------------
# Config
# ---------------------------------------------------------------------------

DOMAIN = "thalx.io"
DATA_FILE = Path(__file__).parent / "data.json"
MAX_RESULTS = 50          # search depth per keyword
PAUSE_BETWEEN = 8.0       # seconds between queries to avoid rate-limiting
NUM_RESULTS_PER_PAGE = 10  # googlesearch default

# Telegram notification
TELEGRAM_TOKEN_FILE = Path.home() / ".claude/channels/telegram/.env"
TELEGRAM_CHAT_ID = "8286896218"

KEYWORDS = [
    {"keyword": "AI content repurposing", "target": "/en/blog/how-ai-content-repurposing-saves-creators-15-hours-per-week-EJFVKjg0"},
    {"keyword": "content repurposing strategy", "target": "/en/blog/one-video-three-platforms-the-science-behind-effective-content-repurposing"},
    {"keyword": "AI content quality score", "target": "/en/blog/ai-quality-scores-for-social-media-content-how-algorithms-predict-engagement"},
    {"keyword": "podcast to reels", "target": "/en/blog/podcast-to-reels-the-complete-guide-to-repurposing-audio-content-in-2026-J1su1tp8"},
    {"keyword": "best content repurposing tool", "target": "/en/blog/why-most-content-repurposing-tools-fail-and-what-to-look-for-instead"},
    {"keyword": "ROI AI content creation", "target": "/en/blog/the-roi-of-ai-powered-content-creation-numbers-every-marketing-manager-should-know"},
    {"keyword": "thalx AI video repurposing", "target": "/en"},
    {"keyword": "AI repurpose video content 2026", "target": "/en/blog"},
]

# ---------------------------------------------------------------------------
# Helpers
# ---------------------------------------------------------------------------

def load_data() -> dict:
    """Load existing data.json or return a skeleton."""
    if DATA_FILE.exists():
        with open(DATA_FILE, "r") as f:
            return json.load(f)
    return _skeleton()


def _skeleton() -> dict:
    return {
        "lastChecked": None,
        "nextCheck": None,
        "geoScore": {"current": None, "previous": None, "nextAudit": "2026-04-13"},
        "indexation": {"sitemap": 10, "indexed": "pending", "newPosts": 6},
        "keywords": [],
        "posts": [],
        "platforms": {"google": 25, "chatgpt": 18, "perplexity": 15, "gemini": 22, "bing": 28},
        "actionsCompleted": [],
        "actionsPending": [],
    }


def save_data(data: dict) -> None:
    DATA_FILE.parent.mkdir(parents=True, exist_ok=True)
    with open(DATA_FILE, "w") as f:
        json.dump(data, f, indent=2, ensure_ascii=False)


def find_keyword_entry(keywords_list: list, keyword: str) -> dict | None:
    for entry in keywords_list:
        if entry["keyword"] == keyword:
            return entry
    return None


def search_google(query: str, num_results: int = MAX_RESULTS) -> list[str]:
    """
    Search Google for `query` and return up to `num_results` URLs.
    Uses the googlesearch-python package.

    Install: pip install googlesearch-python
    """
    try:
        from googlesearch import search
    except ImportError:
        print(
            "ERROR: googlesearch-python not installed.\n"
            "  Run: source ~/agents-claude-env/bin/activate && pip install googlesearch-python",
            file=sys.stderr,
        )
        sys.exit(1)

    urls = []
    try:
        for url in search(query, num_results=num_results, lang="en", sleep_interval=2):
            urls.append(url)
    except Exception as e:
        print(f"  WARNING: Google search failed for '{query}': {e}", file=sys.stderr)
    return urls


def find_position(urls: list[str], domain: str) -> int | None:
    """Return 1-based position of first URL containing `domain`, or None."""
    for i, url in enumerate(urls, start=1):
        if domain in url:
            return i
    return None


# ---------------------------------------------------------------------------
# Main
# ---------------------------------------------------------------------------

def run():
    now = datetime.now(timezone.utc)
    today_str = now.strftime("%Y-%m-%d")
    iso_now = now.strftime("%Y-%m-%dT%H:%M:%SZ")
    next_week = (now + timedelta(days=7)).strftime("%Y-%m-%dT%H:%M:%SZ")

    data = load_data()
    data["lastChecked"] = iso_now
    data["nextCheck"] = next_week

    # Build a lookup for existing keyword entries
    existing_kw = {e["keyword"]: e for e in data.get("keywords", [])}

    results_summary = []

    for i, kw in enumerate(KEYWORDS):
        keyword = kw["keyword"]
        target = kw["target"]
        print(f"[{i+1}/{len(KEYWORDS)}] Searching: {keyword} ...", end=" ", flush=True)

        urls = search_google(keyword)
        position = find_position(urls, DOMAIN)

        # Find or create keyword entry
        entry = existing_kw.get(keyword)
        if entry is None:
            entry = {
                "keyword": keyword,
                "target": target,
                "googlePosition": None,
                "bingPosition": None,
                "history": [],
            }
            existing_kw[keyword] = entry

        # Update position
        prev_position = entry.get("googlePosition")
        entry["googlePosition"] = position
        entry["target"] = target  # refresh target in case it changed

        # Append to history
        entry.setdefault("history", [])
        entry["history"].append({"date": today_str, "position": position})

        # Keep last 52 weeks of history
        entry["history"] = entry["history"][-52:]

        # Determine movement
        if prev_position is not None and position is not None:
            delta = prev_position - position  # positive = improved
            movement = f"+{delta}" if delta > 0 else str(delta)
        else:
            movement = "new"

        pos_str = str(position) if position else ">50"
        print(f"position={pos_str} ({movement})")
        results_summary.append((keyword, pos_str, movement))

        # Pause between queries to be polite
        if i < len(KEYWORDS) - 1:
            time.sleep(PAUSE_BETWEEN)

    # Rebuild keywords list preserving order of KEYWORDS config
    kw_order = [kw["keyword"] for kw in KEYWORDS]
    ordered = [existing_kw[k] for k in kw_order if k in existing_kw]
    # Append any extras that exist in data but not in current KEYWORDS
    extras = [existing_kw[k] for k in existing_kw if k not in kw_order]
    data["keywords"] = ordered + extras

    save_data(data)

    # Print summary
    print("\n" + "=" * 60)
    print(f"SEO Monitor — {DOMAIN} — {today_str}")
    print("=" * 60)
    print(f"{'Keyword':<45} {'Pos':>5} {'Move':>6}")
    print("-" * 60)
    for keyword, pos, move in results_summary:
        print(f"{keyword:<45} {pos:>5} {move:>6}")
    print("-" * 60)
    ranked = sum(1 for _, p, _ in results_summary if p != ">50")
    print(f"Ranked in top 50: {ranked}/{len(results_summary)}")
    print(f"Data saved to: {DATA_FILE}")

    # Send Telegram notification
    lines = [f"📊 <b>SEO Monitor — {DOMAIN}</b>", f"📅 {today_str}", ""]
    for keyword, pos, move in results_summary:
        icon = "🟢" if pos != ">50" else "⚪"
        move_str = f" ({move})" if move != "new" else ""
        lines.append(f"{icon} {keyword}: <b>{pos}</b>{move_str}")
    lines.append(f"\n🎯 Top 50: <b>{ranked}/{len(results_summary)}</b>")
    # Highlight wins
    wins = [(kw, p) for kw, p, m in results_summary if p != ">50"]
    if wins:
        lines.append("\n🏆 <b>Keywords rankeadas:</b>")
        for kw, p in wins:
            lines.append(f"  • {kw} → posición {p}")
    send_telegram("\n".join(lines))

    # Send email
    email_lines = [f"SEO Monitor — {DOMAIN} — {today_str}\n"]
    for keyword, pos, move in results_summary:
        icon = "✅" if pos != ">50" else "⬜"
        move_str = f" ({move})" if move != "new" else ""
        email_lines.append(f"{icon} {keyword}: {pos}{move_str}")
    email_lines.append(f"\nTop 50: {ranked}/{len(results_summary)}")
    if wins:
        email_lines.append("\n🏆 Keywords rankeadas:")
        for kw, p in wins:
            email_lines.append(f"  • {kw} → posición {p}")
    email_lines.append(f"\nDashboard: https://playgrounds.digitalhubassist.ai/seo-monitor-thalx/")
    send_email(f"📊 SEO Monitor — {DOMAIN} — {today_str}", "\n".join(email_lines))


def send_email(subject: str, body: str) -> None:
    """Send monitoring results via AgentMail (milesofroberto@agentmail.to)."""
    try:
        from agentmail import AgentMail
        client = AgentMail(api_key="am_us_8392836cd0a7490664687e31ca896b9f5cf3b18ebd392ca2ceebbad5f532dcf9")
        client.inboxes.messages.send(
            "milesofroberto@agentmail.to",
            to="aguirrerjg@gmail.com",
            subject=subject,
            text=body
        )
        print("Email notification sent")
    except Exception as e:
        print(f"WARN: Email send failed: {e}")


def send_telegram(text: str) -> None:
    """Send monitoring results via Telegram."""
    try:
        token = None
        if TELEGRAM_TOKEN_FILE.exists():
            for line in TELEGRAM_TOKEN_FILE.read_text().splitlines():
                if line.startswith("TELEGRAM_BOT_TOKEN="):
                    token = line.split("=", 1)[1].strip()
        if not token:
            print("WARN: No Telegram token found, skipping notification")
            return
        data = urllib.parse.urlencode({"chat_id": TELEGRAM_CHAT_ID, "text": text, "parse_mode": "HTML"}).encode()
        req = urllib.request.Request(f"https://api.telegram.org/bot{token}/sendMessage", data=data)
        urllib.request.urlopen(req, timeout=10)
        print("Telegram notification sent")
    except Exception as e:
        print(f"WARN: Telegram send failed: {e}")


if __name__ == "__main__":
    run()