Source code for calute.agents.compaction_agent

# Copyright 2025 The EasyDeL/Calute Author @erfanzar (Erfan Zare Chavoshi).
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.


"""Dedicated agent for intelligent context compaction through summarization.

This module provides the CompactionAgent class, which specializes in compacting
conversation context and message histories through intelligent summarization.
It helps manage context length in long-running conversations by creating concise
summaries while preserving critical information.

The agent supports features like:
- Multiple summary length modes (brief, concise, detailed)
- Topic preservation during summarization
- Message history compaction with recent message preservation
- Fallback truncation when LLM-based summarization fails
- Asynchronous LLM integration for summary generation

Typical usage example:
    from calute.agents.compaction_agent import CompactionAgent

    agent = CompactionAgent(
        llm_client=my_llm_client,
        target_length="concise"
    )

    # Summarize raw context
    summary = agent.summarize_context(long_context)

    # Compact message history
    compacted = agent.summarize_messages(messages, preserve_recent=3)
"""

from typing import Any


[docs]class CompactionAgent:
    """Agent specialized in compacting context through intelligent summarization.

    CompactionAgent provides intelligent context compaction capabilities using
    LLM-based summarization. It can summarize raw text context or entire message
    histories while preserving important information and recent interactions.

    The agent uses configurable length instructions to control output verbosity
    and supports topic preservation to ensure critical subjects are covered
    in summaries.

    Attributes:
        llm_client: LLM client instance used for generating summaries.
            Must support `generate_completion` method for async completion.
        target_length: Target summary verbosity level. One of:
            - 'brief': Extremely concise, 2-3 sentences
            - 'concise': Balanced, captures key points in paragraphs
            - 'detailed': Comprehensive, preserves context and decisions
        length_instructions: Dictionary mapping length modes to instruction prompts
            used to guide the LLM's summarization behavior.

    Example:
        >>> agent = CompactionAgent(llm_client=client, target_length="brief")
        >>> summary = agent.summarize_context("Long conversation text...")
        >>> print(summary)  # Returns 2-3 sentence summary
    """

    def __init__(self, llm_client: Any, target_length: str = "concise"):
        """Initialize the compaction agent.

        Sets up the compaction agent with an LLM client and configures the
        target summary length. Initializes the length instruction templates
        used for guiding summarization.

        Args:
            llm_client: LLM client instance for generating summaries. Should
                support `generate_completion` method with prompt, temperature,
                max_tokens, and stream parameters.
            target_length: Target summary length mode. Valid values are:
                - 'brief': Extremely brief, 2-3 sentences only
                - 'concise': Balanced summary with key points (default)
                - 'detailed': Detailed summary preserving context

        Note:
            If an unsupported target_length is provided, the agent will
            fall back to 'concise' mode during summarization.
        """
        self.llm_client = llm_client
        self.target_length = target_length

        self.length_instructions = {
            "brief": "Create an extremely brief summary in 2-3 sentences focusing only on the most critical information.",
            "concise": "Create a concise summary that captures the key points and important details in a few paragraphs.",
            "detailed": "Create a detailed summary that preserves important context, key decisions, and relevant details.",
        }

[docs]    def summarize_context(self, context: str, preserve_topics: list[str] | None = None) -> str:
        """Summarize context intelligently using LLM-based summarization.

        Uses the configured LLM client to generate an intelligent summary of
        the provided context. The summary respects the configured target_length
        setting and can preserve specific topics during compaction.

        Args:
            context: The raw text context to summarize. If empty or under
                200 characters, returns the original context unchanged.
            preserve_topics: Optional list of topic keywords that must be
                covered in the summary. These topics are explicitly mentioned
                in the summarization prompt to ensure coverage.

        Returns:
            str: Summarized context text. Returns original context if it's
                too short (< 200 chars) or if summarization fails.

        Raises:
            No exceptions are raised; errors fall back to truncation.

        Note:
            - Uses asyncio to run the async LLM completion synchronously
            - Falls back to _fallback_truncate if LLM call fails
            - Temperature is set to 0.3 for consistent, focused summaries
            - Max tokens is limited to 2048 for the summary response

        Example:
            >>> summary = agent.summarize_context(
            ...     "Long conversation about AI and machine learning...",
            ...     preserve_topics=["neural networks", "training data"]
            ... )
        """
        if not context or len(context) < 200:
            return context

        length_instruction = self.length_instructions.get(self.target_length, self.length_instructions["concise"])

        prompt = f"""You are a context compaction specialist. Your job is to summarize conversation context while preserving the most important information.

{length_instruction}

IMPORTANT GUIDELINES:
- Preserve key facts, decisions, and outcomes
- Maintain chronological order where relevant
- Keep technical details that are likely to be referenced later
- Remove redundant information and verbose explanations
- Use clear, direct language
"""

        if preserve_topics:
            prompt += f"\n- Ensure these topics are covered: {', '.join(preserve_topics)}"

        prompt += f"""

CONTEXT TO SUMMARIZE:
{context}

COMPACTED SUMMARY:"""

        try:
            if hasattr(self.llm_client, "generate_completion"):
                import asyncio

                try:
                    loop = asyncio.get_event_loop()
                except RuntimeError:
                    loop = asyncio.new_event_loop()
                    asyncio.set_event_loop(loop)

                response = loop.run_until_complete(
                    self.llm_client.generate_completion(prompt=prompt, temperature=0.3, max_tokens=2048, stream=False)
                )

                if hasattr(response, "choices") and response.choices:
                    return response.choices[0].message.content
                elif hasattr(response, "content"):
                    return response.content
                elif hasattr(response, "text"):
                    return response.text
                elif isinstance(response, str):
                    return response
                return str(response)
            else:
                return self._fallback_truncate(context)

        except Exception as e:
            print(f"Error during summarization: {e}")
            import traceback

            traceback.print_exc()
            return self._fallback_truncate(context)

[docs]    def summarize_messages(
        self,
        messages: list[dict[str, str]],
        preserve_recent: int = 3,
    ) -> list[dict[str, str]]:
        """Summarize a list of messages into a compacted conversation history.

        Compacts a message history by summarizing older messages while preserving
        recent messages unchanged. System messages are always preserved separately.
        The summary is inserted as a special user message indicating it represents
        the previous conversation.

        Args:
            messages: List of message dictionaries with 'role' and 'content' keys.
                Roles typically include 'system', 'user', and 'assistant'.
            preserve_recent: Number of most recent non-system messages to keep
                unchanged. Defaults to 3. Set to 0 to summarize all messages.

        Returns:
            list[dict[str, str]]: Compacted message list containing:
                - All original system messages (preserved as-is)
                - One summary message with role 'user' containing the summary
                - The most recent `preserve_recent` messages unchanged

        Note:
            - If total messages <= preserve_recent + 1, returns original messages
            - System messages are separated and always preserved at the beginning
            - The summary message includes a header indicating how many messages
              were summarized: "[PREVIOUS CONVERSATION SUMMARY - N messages]"

        Example:
            >>> messages = [
            ...     {"role": "system", "content": "You are helpful."},
            ...     {"role": "user", "content": "Hello"},
            ...     {"role": "assistant", "content": "Hi there!"},
            ...     {"role": "user", "content": "Tell me about AI"},
            ...     {"role": "assistant", "content": "AI is..."},
            ... ]
            >>> compacted = agent.summarize_messages(messages, preserve_recent=2)
            >>> len(compacted)  # system + summary + 2 recent = 4 messages
            4
        """
        if len(messages) <= preserve_recent + 1:
            return messages

        system_messages = [m for m in messages if m.get("role") == "system"]
        other_messages = [m for m in messages if m.get("role") != "system"]

        recent_messages = other_messages[-preserve_recent:] if preserve_recent > 0 else []
        older_messages = other_messages[:-preserve_recent] if preserve_recent > 0 else other_messages

        if not older_messages:
            return messages

        context_parts = []
        for msg in older_messages:
            role = msg.get("role", "unknown")
            content = msg.get("content", "")
            context_parts.append(f"[{role.upper()}]: {content}")

        full_context = "\n\n".join(context_parts)

        summary = self.summarize_context(full_context)

        summary_message = {
            "role": "user",
            "content": f"[PREVIOUS CONVERSATION SUMMARY - {len(older_messages)} messages]:\n{summary}",
        }

        compacted = [*system_messages, summary_message, *recent_messages]

        return compacted

    def _fallback_truncate(self, context: str, max_chars: int = 2000) -> str:
        """Fallback truncation when LLM-based summarization fails.

        Performs a simple truncation of the context by keeping the first and
        last portions of the text. This ensures some context is preserved
        even when the LLM client is unavailable or encounters an error.

        Args:
            context: The context string to truncate.
            max_chars: Maximum total characters to keep. The result will
                contain roughly half from the beginning and half from the end.
                Defaults to 2000 characters.

        Returns:
            str: Truncated context with a marker indicating how many characters
                were removed. Returns original context if already within limit.

        Note:
            The truncation format is:
            "[first half]... [TRUNCATED N characters] ...[last half]"
            This preserves both the beginning (often containing setup/context)
            and the end (often containing conclusions/recent info).
        """
        if len(context) <= max_chars:
            return context

        half = max_chars // 2
        return context[:half] + f"\n\n... [TRUNCATED {len(context) - max_chars} characters] ...\n\n" + context[-half:]


[docs]def create_compaction_agent(llm_client: Any, target_length: str = "concise") -> CompactionAgent:
    """Factory function to create a compaction agent.

    Convenience factory for creating CompactionAgent instances with the
    specified configuration. Provides a simple interface for agent creation
    without needing to import and instantiate the class directly.

    Args:
        llm_client: LLM client instance for generating summaries. Should
            support the `generate_completion` method for async completion.
        target_length: Target summary length mode. Valid values are:
            - 'brief': Extremely brief summaries (2-3 sentences)
            - 'concise': Balanced summaries with key points (default)
            - 'detailed': Comprehensive summaries preserving context

    Returns:
        CompactionAgent: Configured compaction agent instance ready for use.

    Example:
        >>> agent = create_compaction_agent(my_llm_client, "brief")
        >>> summary = agent.summarize_context("Long text...")
    """
    return CompactionAgent(llm_client=llm_client, target_length=target_length)