Source code for calute.api_server.server

# Copyright 2025 The EasyDeL/Calute Author @erfanzar (Erfan Zare Chavoshi).
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.


"""Main API server for the modular Calute API server.

This module provides the core API server infrastructure for Calute,
including:
- FastAPI-based HTTP server with OpenAI-compatible endpoints
- Agent registration and management
- Cortex multi-agent orchestration support
- Modular router architecture for different endpoint groups
- Completion services for both standard and Cortex agents

The server supports both standard Calute agents and Cortex agents
for multi-agent orchestration, with full compatibility with OpenAI
client libraries.
"""

from __future__ import annotations

from typing import Any

import uvicorn
from fastapi import FastAPI

from calute import Calute
from calute.cortex import CortexAgent
from calute.llms.base import BaseLLM
from calute.types import Agent

from .completion_service import CompletionService
from .cortex_completion_service import CortexCompletionService
from .routers import ChatRouter, HealthRouter, ModelsRouter


[docs]class CaluteAPIServer: """Modular FastAPI server that provides OpenAI-compatible API for Calute agents. This server exposes registered Calute agents through HTTP endpoints that follow the OpenAI API specification, allowing seamless integration with OpenAI client libraries. The server is designed with a modular architecture: - Separate routers for different endpoint groups - Dedicated service for completion logic - Message conversion utilities - Centralized models for request/response handling Attributes: calute: The Calute instance managing agents. llm: LLM instance for Cortex agents. agents: Dictionary mapping agent IDs to Agent objects. cortex_agents: List of registered CortexAgent instances. enable_cortex: Whether Cortex endpoints are enabled. app: FastAPI application instance. completion_service: Service for handling standard chat completions. cortex_completion_service: Service for handling Cortex completions. Example: >>> from calute import Calute >>> from calute.api_server import CaluteAPIServer >>> >>> calute = Calute(client=openai_client) >>> server = CaluteAPIServer(calute) >>> server.register_agent(my_agent) >>> server.run(port=8000) """ def __init__( self, calute_instance: Calute | None = None, llm: BaseLLM | None = None, can_overide_samplings: bool = False, enable_cortex: bool = False, use_universal_agent: bool = True, ): """Initialize the API server. Sets up the FastAPI application, completion services, and optionally the Cortex multi-agent orchestration layer. If Cortex is enabled and an LLM is provided, the Cortex completion service and routers are initialized immediately. Otherwise, routers are deferred until the first agent is registered. Args: calute_instance: Optional ``Calute`` instance to use for standard agent management and execution. Required for registering standard agents via ``register_agent``. llm: Optional ``BaseLLM`` instance for powering Cortex agents. Required when ``enable_cortex`` is ``True``. can_overide_samplings: Whether to allow incoming request parameters (temperature, top_p, max_tokens, etc.) to override the agent's default sampling settings. Defaults to ``False``. enable_cortex: Whether to enable Cortex multi-agent orchestration endpoints. When ``True``, the server supports model names containing ``"cortex"`` for multi-agent workflows. Defaults to ``False``. use_universal_agent: Whether to include a ``UniversalAgent`` as a fallback agent in the Cortex agent pool. Only relevant when ``enable_cortex`` is ``True``. Defaults to ``True``. """ self.calute = calute_instance self.llm = llm self.agents: dict[str, Agent] = {} self.cortex_agents: list[CortexAgent] = [] self.enable_cortex = enable_cortex title = "Calute API Server" if enable_cortex: title += " with Cortex" self.app = FastAPI( title=title, description="OpenAI-compatible API server for Calute agents with optional Cortex support", version="2.0.0", ) if self.calute: self.completion_service = CompletionService(self.calute, can_overide_samplings=can_overide_samplings) else: self.completion_service = None if enable_cortex and llm: self.cortex_completion_service = CortexCompletionService( llm=llm, agents=self.cortex_agents, use_universal_agent=use_universal_agent, verbose=True, ) else: self.cortex_completion_service = None self._routers_initialized = False if self.enable_cortex and self.cortex_completion_service: self._setup_routers() self._routers_initialized = True
[docs] def register_agent(self, agent: Agent) -> None: """Register a standard agent to be available via the API. Adds the agent to both the Calute instance and the server's internal agent registry. The agent becomes accessible through the chat completions endpoint using its ID, name, or model as the ``model`` parameter in requests. If routers have not yet been initialized, this method triggers router setup. Args: agent: The ``Agent`` instance to register. Must have at least one of ``id``, ``name``, or ``model`` set to serve as the lookup key in the agent registry. Raises: ValueError: If no ``Calute`` instance was provided during server initialization, since standard agents require Calute for execution. Example: >>> server = CaluteAPIServer(calute_instance=calute) >>> agent = Agent(id="assistant", model="gpt-4", instructions="Help users") >>> server.register_agent(agent) """ if not self.calute: raise ValueError("Calute instance required for registering regular agents") self.calute.register_agent(agent) agent_key = agent.id or agent.name or agent.model self.agents[agent_key] = agent if not self._routers_initialized: self._setup_routers() self._routers_initialized = True
[docs] def register_cortex_agent(self, agent: CortexAgent) -> None: """Register a ``CortexAgent`` for multi-agent orchestration. Adds the agent to the Cortex agent pool. If the Cortex completion service has already been initialized, its agent list is updated immediately. If routers have not yet been initialized, this method triggers router setup. Args: agent: The ``CortexAgent`` instance to register. This agent will be available for task assignment and orchestration through the Cortex completion service. Raises: ValueError: If Cortex was not enabled during server initialization (i.e., ``enable_cortex=False``). Example: >>> server = CaluteAPIServer(llm=my_llm, enable_cortex=True) >>> cortex_agent = CortexAgent(name="researcher", llm=my_llm) >>> server.register_cortex_agent(cortex_agent) """ if not self.enable_cortex: raise ValueError("Cortex must be enabled to register CortexAgents") self.cortex_agents.append(agent) if self.cortex_completion_service: self.cortex_completion_service.agents = self.cortex_agents if not self._routers_initialized: self._setup_routers() self._routers_initialized = True
def _setup_routers(self) -> None: """Set up and include FastAPI routers for the API endpoints. Configures the appropriate routers based on which services are available and includes them in the FastAPI application: - ``UnifiedChatRouter``: Used when Cortex is enabled. Handles both standard and Cortex requests through a single endpoint. - ``ChatRouter``: Used when only standard agents are available. - ``ModelsRouter``: Lists all available models/agents. Included whenever at least one completion service is active. - ``HealthRouter``: Provides the health check endpoint. Included whenever at least one completion service is active. This method is called automatically when the first agent is registered or during ``__init__`` if Cortex is pre-configured. """ from .routers import UnifiedChatRouter if self.enable_cortex and self.cortex_completion_service: unified_router = UnifiedChatRouter( agents=self.agents, completion_service=self.completion_service, cortex_completion_service=self.cortex_completion_service, ) self.app.include_router(unified_router.router, tags=["chat"]) elif self.completion_service and self.agents: chat_router = ChatRouter(self.agents, self.completion_service) self.app.include_router(chat_router.router, tags=["chat"]) if self.completion_service or self.cortex_completion_service: all_models = self._get_all_models() models_router = ModelsRouter(all_models) health_router = HealthRouter(all_models) self.app.include_router(models_router.router, tags=["models"]) self.app.include_router(health_router.router, tags=["health"]) def _get_all_models(self) -> dict[str, Any]: """Get all available models including Cortex virtual models. Builds a combined dictionary of all registered standard agents and, if Cortex is enabled, adds virtual model entries for each supported Cortex mode and process type. Virtual Cortex models are generated with multiple common prefixes (empty, ``"calute-"``, ``"api-"``, ``"v1-"``) to support flexible model naming in client requests. Returns: Dictionary mapping model name strings to either ``Agent`` objects (for standard agents) or configuration dictionaries (for Cortex virtual models) containing ``type``, ``mode``, and optionally ``process`` keys. """ models = dict(self.agents) if self.enable_cortex: cortex_base_models = { "cortex": {"type": "cortex", "mode": "instruction"}, "cortex-instruct": {"type": "cortex", "mode": "instruction"}, "cortex-task": {"type": "cortex", "mode": "task"}, "cortex-task-parallel": {"type": "cortex", "mode": "task", "process": "parallel"}, "cortex-task-hierarchical": {"type": "cortex", "mode": "task", "process": "hierarchical"}, } prefixes = ["", "calute-", "api-", "v1-"] for prefix in prefixes: for model_name, config in cortex_base_models.items(): full_name = f"{prefix}{model_name}" if prefix else model_name models[full_name] = config return models
[docs] def run(self, host: str = "0.0.0.0", port: int = 11881, **kwargs) -> None: """Run the API server using uvicorn. Starts the uvicorn ASGI server with the configured FastAPI application. If routers have not been initialized yet, this method attempts to set them up. Raises an error if no agents have been registered and Cortex is not enabled. Args: host: The hostname or IP address to bind the server to. Defaults to ``"0.0.0.0"`` (all interfaces). port: The TCP port number to bind the server to. Defaults to ``11881``. **kwargs: Additional keyword arguments passed directly to ``uvicorn.run()``, such as ``log_level``, ``workers``, ``ssl_keyfile``, etc. Raises: RuntimeError: If no agents are registered and Cortex is not enabled, since the server would have no endpoints to serve. Example: >>> server = CaluteAPIServer(calute_instance=calute) >>> server.register_agent(agent) >>> server.run(host="127.0.0.1", port=8000, log_level="info") """ if not self._routers_initialized: if self.enable_cortex and self.cortex_completion_service: self._setup_routers() self._routers_initialized = True else: raise RuntimeError( "No agents registered. Please register at least one agent before starting the server." ) uvicorn.run(self.app, host=host, port=port, **kwargs)
[docs] @classmethod def create_server( cls, client: Any, agents: list[Agent] | None | Agent = None, can_overide_samplings: bool = False, **calute_kwargs, ) -> CaluteAPIServer: """Create a Calute API server with the given client and agents. This is a convenience factory method that handles the full setup sequence: creating a ``Calute`` instance, wrapping it in a ``CaluteAPIServer``, and registering all provided agents. The returned server is ready to be started with ``run()``. Args: client: An OpenAI-compatible client instance (e.g., ``openai.OpenAI(...)``). Passed to the ``Calute`` constructor. agents: A single ``Agent`` instance or a list of ``Agent`` instances to register with the server. If ``None``, no agents are registered and they must be added later via ``register_agent()``. can_overide_samplings: Whether to allow incoming request parameters (temperature, top_p, max_tokens, etc.) to override the agent's default sampling settings. Defaults to ``False``. **calute_kwargs: Additional keyword arguments passed directly to the ``Calute`` constructor (e.g., ``max_history_length``, ``system_prompt``). Returns: A fully configured ``CaluteAPIServer`` instance with all provided agents registered and ready to serve requests. Example: >>> import openai >>> from calute.types import Agent >>> from calute.api_server import CaluteAPIServer >>> >>> client = openai.OpenAI(api_key="key", base_url="url") >>> agent = Agent(id="assistant", model="gpt-4", instructions="Help users") >>> server = CaluteAPIServer.create_server(client, agents=[agent]) >>> server.run(port=8000) """ calute = Calute(client=client, **calute_kwargs) server = CaluteAPIServer(calute, can_overide_samplings) if isinstance(agents, Agent): agents = [agents] if agents: for agent in agents: server.register_agent(agent) return server