# ruff: noqa: E501
import logging
import textwrap

from dapr.ext.workflow import WorkflowActivityContext

logger = logging.getLogger(__name__)


def generate_prompt(ctx: WorkflowActivityContext, input_data: dict) -> str:
    """
    Generate a prompt dynamically for the chunk.
    """
    input_data = input_data or {}
    text = str(input_data.get("text", ""))
    iteration_index = int(input_data.get("iteration_index", 1))
    total_iterations = int(input_data.get("total_iterations", 1))
    context = str(input_data.get("context", ""))
    participants: list[str] = input_data.get("participants") or []
    doc_metadata = input_data.get("doc_metadata") or {}

    logger.info(f"Processing iteration {iteration_index} of {total_iterations}.")
    instructions = textwrap.dedent(f"""
    ## CONTEXT
    - Previous conversation: {context.strip() or "No prior context available."}
    - This is iteration {iteration_index} of {total_iterations}.
    """).strip()

    search_context = input_data.get("search_context")
    if search_context:
        instructions += "\n## WEB RESEARCH\n" + search_context.strip()

    if participants:
        participant_names = ", ".join(participants)
        instructions += f"\n## PARTICIPANTS: {participant_names}"
    else:
        instructions += "\n## PARTICIPANTS: None (HOST-ONLY Conversation)"

    if iteration_index == 1:
        instructions += textwrap.dedent(
            f"""
            ## INSTRUCTIONS:
            - Start with a warm welcome to the podcast (as narrators of the research, not the
              original scientists).
            - Introduce the host and any participants (never highlight missing participants).
            - Mention the paper title you are discussing (you are not the author):
              {doc_metadata.get('title', 'Unknown Title')}.
            - Summarize the planned discussion using the paper summary:
              {doc_metadata.get('summary', 'No summary available.')}.
            - Highlight why this paper matters and set an engaging tone.
            """
        ).strip()
    elif iteration_index == total_iterations:
        instructions += textwrap.dedent(
            """
            ## INSTRUCTIONS:
            - Conclude with a concise summary of the discussion.
            - Avoid phrases that imply restarting (e.g., "Welcome back" or "As we return").
            - Close with a natural, context-aware farewell (avoid generic "thanks for listening, goodbye").
            - Reflect on the key takeaways from the paper and the episode.
            - If WEB RESEARCH is provided, reference the most relevant findings before wrapping up.
            - Thank the listeners for joining and encourage them to explore the research further.
            """
        ).strip()
    else:
        instructions += textwrap.dedent(
            """
            ## INSTRUCTIONS:
            - Continue seamlessly without re-introducing the show.
            - Never use restart phrases (e.g., "Welcome back", "Listeners, today", "In this segment", "As we return")
            - Never use teasers such as "In the next segment" or "In upcoming episodes"; mid-segment dialogue should never reference future segments.
            - Build directly from the previous discussion and transition naturally.
            - Maintain continuity using the provided context.
            """
        ).strip()

    instructions += textwrap.dedent("""
    - Use the provided TEXT and CONTEXT as the basis for this segment. Let TEXT drive the
      discussion while CONTEXT maintains continuity.
    - Refer to CONTEXT when needed to connect prior discussion points.
    - If a WEB RESEARCH section is present, reference it when it clarifies claims, adds context,
      or introduces timely developments relevant to the TEXT.
    - Alternate between speakers (if available) to keep the conversation dynamic.
    - Keep responses concise, relevant, and focused on the current idea.
    - Transition smoothly between topics and maintain logical progression.
    - Elaborate on key points without repeating earlier content.
    - Use natural, conversational language.
    - Never claim ownership of the research; avoid phrases such as "our methodology" or "we built"
      and instead reference the authors or the paper in third person.
    - When TEXT references tables, figures, or images, describe the insight in natural language
      (e.g., "a latency comparison table shows…") instead of saying "Table 2" or "Figure 1".
      Explain what the visual conveys and why it matters, keeping commentary focused and brief.
    """).strip()
    return f"{instructions}\n## TEXT:\n{text.strip()}"


LLM_LABEL_PROMPT = """
# Role
You classify research papers focused on **LLM / agentic security**.

# Relevance Definition
A paper is relevant when it uses **LLMs, RAG, autonomous / agentic AI, or tool-using agents**
in a **security operations** context. Cover both attacking and defending AI systems.

## Offense (red teaming & attacking AI)
- Augmenting offensive security work with LLMs / agents.
- Research that targets AI systems (prompt injection, jailbreaks, adversarial examples, model
  extraction / poisoning, red-team frameworks, automated exploit discovery / evasion,
  social-engineering simulation, orchestration of offensive tools).

## Defense (blue teams & protecting AI)
- Using LLMs / agents for SOC, threat intel, detection engineering, IR, or threat hunting.
- Safeguarding AI systems in production (alert triage, query/log generation, enrichment,
  investigation planning, knowledge retrieval, autonomous response, guardrails / sandboxing,
  LLM security monitoring, model / prompt defenses).

Notes:
- Methods may involve prompting, tool calling, structured reasoning, RAG, graph / schema retrieval,
  autonomous planning, evaluators, or guardrails.
- Enabling work (datasets, evals, safety, infrastructure) is relevant when explicitly framed for
  LLM / agent security (offense, defense, or securing AI systems).

# Non-Relevance (Exclude)
- Generic AI/ML without a security-ops or AI-security angle.
- Traditional security (rules, signatures, heuristics) without LLMs / agents / RAG.
- Broad privacy, governance, or ethics without concrete LLM / agent security use cases.

# Inputs
- **TITLE:** {title}
- **ABSTRACT:** {abstract}

# Output Fields (handled by caller)
- **relevant:** true / false based on the criteria above.
- **reason:** concise sentence referencing abstract details. Mention the task, red vs. blue (or
  attacking vs. defending AI systems), and how LLMs / agents / RAG are used.

# Decision Rubric
1. Does the work use or analyze LLMs, RAG, or agentic systems? If no → likely false.
2. Is it tied to offensive or defensive security operations (including securing AI systems)?
   If yes → likely true.
3. For enabling work (evals, datasets, safety, infra): is the intended application clearly LLM /
   agent security (offense, defense, or protecting AI systems)? If yes → true, else false.
"""

ANALYZE_PAPERS_PROMPT = """
# Instructions
You will receive JSON objects describing research papers. Each object includes:
- **id**: unique identifier
- **title**: paper title
- **summary**: abstract text

## Tasks
- Decide if the work applies GenAI/agentic systems to cybersecurity operations.
- Highlight papers where advanced AI meaningfully assists defenders or empowers attackers.

### Primary Focus
- Offensive strategies: AI-assisted pentesting, attack simulation, adversarial tactics.
- Defensive strategies: detection engineering, incident response, vulnerability assessment.

### Use Cases
- Explain how the AI approach helps security analysts mitigate threats, or how threat actors
  could weaponize it.

### Exclusion Criteria
- Skip generic AI topics without a concrete security-ops connection.
- Skip broad privacy / governance / ethics discussions without agent/LLM security content.

## Research Papers
{general_papers}

## Output
- Return the ids of papers that satisfy the criteria above.
- Ensure your reasoning references the instructions.
"""

GENERATE_TRANSCRIPT_PROMPT = """
# PODCAST GENERATOR
- Generate a structured and engaging podcast dialogue based on the provided CONTEXT and TEXT.
- The podcast is titled '{podcast_name}' and is hosted by {host_name}.
- If participants are present, alternate naturally between the host and participants to ensure a dynamic and balanced conversational flow.
- Each participant (including the host) is limited to a maximum of {max_rounds} turns per iteration.
- A "round" consists of one turn by the host followed by one turn by a participant.
- If no participants are available, the host drives the conversation independently and should NOT mention the absence of other participants.
- Focus on maintaining a concise and coherent dialogue while progressively building upon the provided CONTEXT and TEXT.
- Ensure the conversation feels natural, engaging, and focused on the specified topics.
- Only the first iteration may include greetings; intermediate ones must not reboot the show.
- Simplify complex concepts to make them accessible and relatable to a general audience.
- When image descriptions or figure notes appear in the TEXT, integrate them naturally (what the visual shows and why it matters) without over-explaining.
{prompt}
"""

GENERATE_EPISODE_PROMPT = """
# EPISODE OVERVIEW GENERATOR
You will be provided with the full transcript of a podcast episode discussing a specific research paper. Your task is to create the following:

1. **Engaging Episode Title**: Craft a concise title (≤10 words) that captures the main essence of the episode. The title should be curiosity-inducing and accessible to a general audience. *Do not* copy or lightly rephrase the research paper title.
2. **Episode Overview**: Write a 3-4 sentence summary that references what the host(s) actually discussed. Highlight the key discussion arcs, citing speakers when relevant (e.g., “the host explains…”). Tie the summary to broader cybersecurity/AI implications.
3. **Key Takeaways**: Provide a concise list of up to **5 key insights**. Each takeaway (1-2 sentences) must:
   - Highlight a specific point raised in the episode.
   - Explain why it matters for security practitioners or researchers (e.g., an operational impact, defensive value, offensive risk).

## Instructions
- Use clear, conversational language; avoid jargon unless the transcript explicitly spells it out.
- Never claim ownership of the research (avoid “our model”, “we found”). Always describe findings in third person.
- Avoid referencing tables/figures by number (e.g., “Table 2”). Instead, describe what the visual showed in plain language.
- Ensure the tone is professional yet approachable, mirroring the podcast's conversational style.
- Structure the sections logically (Title → Overview → Key Takeaways) and make sure each takeaway stands on its own.

## Paper ID
{paper_id}

## Episode Transcript
{transcript}
"""

QUERY_GENERATOR_PROMPT = """# Paper-Aware Web Search Query Generator
## Objective
Craft **1-2** precise web-search queries that help the reader deepen, validate, or expand the current discussion about the paper excerpt below.

## Requirements

- **Number of queries:** 1 to 2
- **Recency:** Assume today is **{date_time}**; capture timely angles when relevant.
- **Format:** Output valid JSON with a single object containing:
  - `queries`: array of query strings (1-2 items)
  - `rationales`: array of 1-2 sentence explanations (same length as `queries`).

## Instructions

1. Anchor every query in the CONTEXT—address unresolved claims, missing background, implementation details, evaluations, or real-world impacts mentioned in the paper segment.
2. Ensure each query targets a distinct facet (e.g., technique clarifications, related work, threat intel, tooling updates).
3. Keep each query concise (≤7 words), specific, and free of stop-words/filler.
4. Prefer actionable phrasing that a human analyst could paste directly into a search engine.

## Paper Context
{context}
"""
