"""
LLM Client Module

Provides HTTP and mock inference backends for language model interactions.
"""

import json
import urllib.request
import urllib.error
from typing import Optional

from ..config import (
    LLM_MODE,
    LLM_HTTP_URL,
    LLM_TIMEOUT_SEC,
    LLM_MAX_TOKENS,
    LLM_FALLBACK_TO_MOCK,
    log_llm,
)
from .intent import guess_intent
from .prompts import LLM_SYSTEM_PROMPT


class LLMClient:
    """
    LLM inference client supporting HTTP and mock modes.

    Supports automatic fallback to mock responses when HTTP fails.
    """

    def __init__(
        self,
        mode: str = LLM_MODE,
        http_url: str = LLM_HTTP_URL,
        timeout_sec: float = LLM_TIMEOUT_SEC,
        max_tokens: int = LLM_MAX_TOKENS,
        fallback_to_mock: bool = LLM_FALLBACK_TO_MOCK,
    ):
        """
        Initialize LLM client.

        Args:
            mode: Inference mode ('http' or 'mock')
            http_url: URL for HTTP inference endpoint
            timeout_sec: Request timeout in seconds
            max_tokens: Maximum response tokens
            fallback_to_mock: Whether to fallback to mock on HTTP failure
        """
        self.mode = mode
        self.http_url = http_url
        self.timeout_sec = timeout_sec
        self.max_tokens = max_tokens
        self.fallback_to_mock = fallback_to_mock

    def infer(self, payload: dict) -> dict:
        """
        Run LLM inference.

        Args:
            payload: Request payload containing:
                - system_prompt: System prompt string
                - history: List of conversation messages
                - latest_user_utterance: Current user input

        Returns:
            Response dict containing:
                - assistant_reply: Generated response text
                - intent: Detected intent
                - needs_clarification: Whether clarification is needed
        """
        if self.mode == "mock":
            return self._infer_mock(payload)

        if self.mode == "http":
            try:
                return self._infer_http(payload)
            except Exception as e:
                log_llm.error("HTTP inference failed: %r", e)
                if self.fallback_to_mock:
                    log_llm.warning("Falling back to mock inference")
                    return self._infer_mock(payload)
                raise

        return {"error": f"Unsupported mode: {self.mode}"}

    def _infer_http(self, payload: dict) -> dict:
        """HTTP inference via llama.cpp compatible API"""
        messages = [
            {"role": "system", "content": payload.get("system_prompt", LLM_SYSTEM_PROMPT)}
        ]

        for m in payload.get("history", []):
            messages.append({
                "role": m["role"],
                "content": m["content"]
            })

        body = {
            "model": "qwen",
            "messages": messages,
            "temperature": 0.1,
            "max_tokens": self.max_tokens,
            "response_format": {"type": "json_object"}
        }

        # Retry logic with shorter individual timeouts
        max_retries = 2
        timeout_per_try = self.timeout_sec / max_retries

        for attempt in range(max_retries):
            try:
                req = urllib.request.Request(
                    self.http_url,
                    data=json.dumps(body).encode("utf-8"),
                    headers={"Content-Type": "application/json"},
                    method="POST",
                )

                with urllib.request.urlopen(req, timeout=timeout_per_try) as resp:
                    raw = resp.read().decode("utf-8", errors="replace")

                data = json.loads(raw)

                choices = data.get("choices")
                if not choices or not isinstance(choices, list):
                    raise ValueError(f"Invalid LLM response: {data}")

                content = (
                    choices[0]
                    .get("message", {})
                    .get("content", "")
                    .strip()
                )

                try:
                    parsed = json.loads(content)
                    if "assistant_reply" not in parsed and "question" in parsed:
                        parsed["assistant_reply"] = parsed["question"]
                    return parsed
                except json.JSONDecodeError:
                    return {
                        "assistant_reply": content,
                        "needs_clarification": False,
                        "intent": "unknown"
                    }

            except (urllib.error.URLError, TimeoutError) as e:
                if attempt < max_retries - 1:
                    log_llm.warning(
                        "HTTP attempt %d/%d failed: %r, retrying...",
                        attempt + 1, max_retries, e
                    )
                    continue
                else:
                    raise

        return {"error": "All retry attempts failed"}

    def _infer_mock(self, payload: dict) -> dict:
        """Mock inference using keyword-based responses"""
        text = (payload.get("latest_user_utterance") or "").strip()
        t = text.lower()
        intent = guess_intent(t)

        needs_clarification = False
        assistant_reply = ""
        action = "log_only"

        # Domain-specific responses
        if intent == "closing":
            assistant_reply = "You're welcome! Have a great day."
            needs_clarification = False
            action = "end_call"

        elif intent == "erp_inquiry":
            assistant_reply = "Got it! Are you interested in our cloud ERP or on-premise solutions?"
            needs_clarification = True

        elif intent == "crm_inquiry":
            assistant_reply = "Perfect! Our CRM handles sales, marketing, and support. Which area interests you most?"
            needs_clarification = True

        elif intent == "crm_features":
            if "lead" in t or "conversion" in t or "opportunity" in t:
                assistant_reply = "Great! Our CRM tracks full lead-to-close pipeline with automated scoring. Want a demo?"
            else:
                assistant_reply = "Our CRM includes lead scoring, pipeline management, and forecasting. What's your priority?"
            needs_clarification = True

        elif intent == "pricing_inquiry":
            assistant_reply = "Our plans start at $99/user/month for CRM, $149 for ERP. How many users do you have?"
            needs_clarification = True

        elif intent == "implementation":
            assistant_reply = "We offer full implementation support with training. Timeline is typically 4-8 weeks. Sound good?"
            needs_clarification = True

        elif intent == "demo_request":
            assistant_reply = "I'd love to schedule a demo! What's your preferred time this week?"
            needs_clarification = True

        elif intent == "introduction":
            if "erp" in t or "crm" in t:
                assistant_reply = "Thanks for calling! I can help with that. What specific info do you need?"
                needs_clarification = False
            else:
                assistant_reply = "Thanks for calling! Are you interested in ERP, CRM, or both?"
                needs_clarification = True

        elif intent == "connectivity_issue":
            assistant_reply = "I understand. Is your internet not working at all, or just running slowly?"
            needs_clarification = True

        elif intent == "account_access_issue":
            assistant_reply = "Got it. Can you tell me what happens when you try to log in?"
            needs_clarification = True

        elif intent == "system_issue":
            assistant_reply = "I see. Can you describe exactly what's happening?"
            needs_clarification = True

        else:
            # Unknown intent - provide helpful options
            if any(k in t for k in ["want", "need", "looking for", "interested"]):
                assistant_reply = "I can help with ERP, CRM, pricing, or demos. Which interests you?"
            else:
                assistant_reply = "I'm here to help. Are you calling about ERP, CRM, or something else?"
            needs_clarification = True

        return {
            "normalized_text": text,
            "intent": intent,
            "needs_clarification": needs_clarification,
            "assistant_reply": assistant_reply,
            "action": action,
        }


# Global client instance
_client: Optional[LLMClient] = None


def get_client() -> LLMClient:
    """Get global LLM client instance"""
    global _client
    if _client is None:
        _client = LLMClient()
    return _client


def llm_infer(payload: dict) -> dict:
    """
    Convenience function for LLM inference.

    Uses global client instance.
    """
    return get_client().infer(payload)