Source code for calute.agents.compaction_agent

# Copyright 2025 The EasyDeL/Calute Author @erfanzar (Erfan Zare Chavoshi).
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.


"""Dedicated agent for intelligent context compaction through summarization.

This module provides the CompactionAgent class, which specializes in compacting
conversation context and message histories through intelligent summarization.
It helps manage context length in long-running conversations by creating concise
summaries while preserving critical information.

The agent supports features like:
- Multiple summary length modes (brief, concise, detailed)
- Topic preservation during summarization
- Message history compaction with recent message preservation
- Fallback truncation when LLM-based summarization fails
- Asynchronous LLM integration for summary generation

Typical usage example:
    from calute.agents.compaction_agent import CompactionAgent

    agent = CompactionAgent(
        llm_client=my_llm_client,
        target_length="concise"
    )

    # Summarize raw context
    summary = agent.summarize_context(long_context)

    # Compact message history
    compacted = agent.summarize_messages(messages, preserve_recent=3)
"""

from typing import Any


[docs]class CompactionAgent: """Agent specialized in compacting context through intelligent summarization. CompactionAgent provides intelligent context compaction capabilities using LLM-based summarization. It can summarize raw text context or entire message histories while preserving important information and recent interactions. The agent uses configurable length instructions to control output verbosity and supports topic preservation to ensure critical subjects are covered in summaries. Attributes: llm_client: LLM client instance used for generating summaries. Must support `generate_completion` method for async completion. target_length: Target summary verbosity level. One of: - 'brief': Extremely concise, 2-3 sentences - 'concise': Balanced, captures key points in paragraphs - 'detailed': Comprehensive, preserves context and decisions length_instructions: Dictionary mapping length modes to instruction prompts used to guide the LLM's summarization behavior. Example: >>> agent = CompactionAgent(llm_client=client, target_length="brief") >>> summary = agent.summarize_context("Long conversation text...") >>> print(summary) # Returns 2-3 sentence summary """ def __init__(self, llm_client: Any, target_length: str = "concise"): """Initialize the compaction agent. Sets up the compaction agent with an LLM client and configures the target summary length. Initializes the length instruction templates used for guiding summarization. Args: llm_client: LLM client instance for generating summaries. Should support `generate_completion` method with prompt, temperature, max_tokens, and stream parameters. target_length: Target summary length mode. Valid values are: - 'brief': Extremely brief, 2-3 sentences only - 'concise': Balanced summary with key points (default) - 'detailed': Detailed summary preserving context Note: If an unsupported target_length is provided, the agent will fall back to 'concise' mode during summarization. """ self.llm_client = llm_client self.target_length = target_length self.length_instructions = { "brief": "Create an extremely brief summary in 2-3 sentences focusing only on the most critical information.", "concise": "Create a concise summary that captures the key points and important details in a few paragraphs.", "detailed": "Create a detailed summary that preserves important context, key decisions, and relevant details.", }
[docs] def summarize_context(self, context: str, preserve_topics: list[str] | None = None) -> str: """Summarize context intelligently using LLM-based summarization. Uses the configured LLM client to generate an intelligent summary of the provided context. The summary respects the configured target_length setting and can preserve specific topics during compaction. Args: context: The raw text context to summarize. If empty or under 200 characters, returns the original context unchanged. preserve_topics: Optional list of topic keywords that must be covered in the summary. These topics are explicitly mentioned in the summarization prompt to ensure coverage. Returns: str: Summarized context text. Returns original context if it's too short (< 200 chars) or if summarization fails. Raises: No exceptions are raised; errors fall back to truncation. Note: - Uses asyncio to run the async LLM completion synchronously - Falls back to _fallback_truncate if LLM call fails - Temperature is set to 0.3 for consistent, focused summaries - Max tokens is limited to 2048 for the summary response Example: >>> summary = agent.summarize_context( ... "Long conversation about AI and machine learning...", ... preserve_topics=["neural networks", "training data"] ... ) """ if not context or len(context) < 200: return context length_instruction = self.length_instructions.get(self.target_length, self.length_instructions["concise"]) prompt = f"""You are a context compaction specialist. Your job is to summarize conversation context while preserving the most important information. {length_instruction} IMPORTANT GUIDELINES: - Preserve key facts, decisions, and outcomes - Maintain chronological order where relevant - Keep technical details that are likely to be referenced later - Remove redundant information and verbose explanations - Use clear, direct language """ if preserve_topics: prompt += f"\n- Ensure these topics are covered: {', '.join(preserve_topics)}" prompt += f""" CONTEXT TO SUMMARIZE: {context} COMPACTED SUMMARY:""" try: if hasattr(self.llm_client, "generate_completion"): import asyncio try: loop = asyncio.get_event_loop() except RuntimeError: loop = asyncio.new_event_loop() asyncio.set_event_loop(loop) response = loop.run_until_complete( self.llm_client.generate_completion(prompt=prompt, temperature=0.3, max_tokens=2048, stream=False) ) if hasattr(response, "choices") and response.choices: return response.choices[0].message.content elif hasattr(response, "content"): return response.content elif hasattr(response, "text"): return response.text elif isinstance(response, str): return response return str(response) else: return self._fallback_truncate(context) except Exception as e: print(f"Error during summarization: {e}") import traceback traceback.print_exc() return self._fallback_truncate(context)
[docs] def summarize_messages( self, messages: list[dict[str, str]], preserve_recent: int = 3, ) -> list[dict[str, str]]: """Summarize a list of messages into a compacted conversation history. Compacts a message history by summarizing older messages while preserving recent messages unchanged. System messages are always preserved separately. The summary is inserted as a special user message indicating it represents the previous conversation. Args: messages: List of message dictionaries with 'role' and 'content' keys. Roles typically include 'system', 'user', and 'assistant'. preserve_recent: Number of most recent non-system messages to keep unchanged. Defaults to 3. Set to 0 to summarize all messages. Returns: list[dict[str, str]]: Compacted message list containing: - All original system messages (preserved as-is) - One summary message with role 'user' containing the summary - The most recent `preserve_recent` messages unchanged Note: - If total messages <= preserve_recent + 1, returns original messages - System messages are separated and always preserved at the beginning - The summary message includes a header indicating how many messages were summarized: "[PREVIOUS CONVERSATION SUMMARY - N messages]" Example: >>> messages = [ ... {"role": "system", "content": "You are helpful."}, ... {"role": "user", "content": "Hello"}, ... {"role": "assistant", "content": "Hi there!"}, ... {"role": "user", "content": "Tell me about AI"}, ... {"role": "assistant", "content": "AI is..."}, ... ] >>> compacted = agent.summarize_messages(messages, preserve_recent=2) >>> len(compacted) # system + summary + 2 recent = 4 messages 4 """ if len(messages) <= preserve_recent + 1: return messages system_messages = [m for m in messages if m.get("role") == "system"] other_messages = [m for m in messages if m.get("role") != "system"] recent_messages = other_messages[-preserve_recent:] if preserve_recent > 0 else [] older_messages = other_messages[:-preserve_recent] if preserve_recent > 0 else other_messages if not older_messages: return messages context_parts = [] for msg in older_messages: role = msg.get("role", "unknown") content = msg.get("content", "") context_parts.append(f"[{role.upper()}]: {content}") full_context = "\n\n".join(context_parts) summary = self.summarize_context(full_context) summary_message = { "role": "user", "content": f"[PREVIOUS CONVERSATION SUMMARY - {len(older_messages)} messages]:\n{summary}", } compacted = [*system_messages, summary_message, *recent_messages] return compacted
def _fallback_truncate(self, context: str, max_chars: int = 2000) -> str: """Fallback truncation when LLM-based summarization fails. Performs a simple truncation of the context by keeping the first and last portions of the text. This ensures some context is preserved even when the LLM client is unavailable or encounters an error. Args: context: The context string to truncate. max_chars: Maximum total characters to keep. The result will contain roughly half from the beginning and half from the end. Defaults to 2000 characters. Returns: str: Truncated context with a marker indicating how many characters were removed. Returns original context if already within limit. Note: The truncation format is: "[first half]... [TRUNCATED N characters] ...[last half]" This preserves both the beginning (often containing setup/context) and the end (often containing conclusions/recent info). """ if len(context) <= max_chars: return context half = max_chars // 2 return context[:half] + f"\n\n... [TRUNCATED {len(context) - max_chars} characters] ...\n\n" + context[-half:]
[docs]def create_compaction_agent(llm_client: Any, target_length: str = "concise") -> CompactionAgent: """Factory function to create a compaction agent. Convenience factory for creating CompactionAgent instances with the specified configuration. Provides a simple interface for agent creation without needing to import and instantiate the class directly. Args: llm_client: LLM client instance for generating summaries. Should support the `generate_completion` method for async completion. target_length: Target summary length mode. Valid values are: - 'brief': Extremely brief summaries (2-3 sentences) - 'concise': Balanced summaries with key points (default) - 'detailed': Comprehensive summaries preserving context Returns: CompactionAgent: Configured compaction agent instance ready for use. Example: >>> agent = create_compaction_agent(my_llm_client, "brief") >>> summary = agent.summarize_context("Long text...") """ return CompactionAgent(llm_client=llm_client, target_length=target_length)