Source code for calute.tools.duckduckgo_engine

# Copyright 2025 The EasyDeL/Calute Author @erfanzar (Erfan Zare Chavoshi).
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.


"""DuckDuckGo search engine integration for Calute agents.

This module provides a comprehensive DuckDuckGo search tool for Calute agents,
enabling web searches with advanced filtering and customization options. It includes:
- Text, image, video, news, and map searches
- Domain and keyword filtering
- Safe search and time range filtering
- Multi-source search across different content types
- Search suggestions and query translation
- Lazy loading of dependencies to avoid import errors

The search tool is implemented as an AgentBaseFn subclass for seamless
integration with Calute agents. It depends on the `ddgs` package, which is
included in Calute's core runtime dependencies.

Example:
    >>> from calute.tools.duckduckgo_engine import DuckDuckGoSearch
    >>> results = DuckDuckGoSearch.static_call("Python programming", n_results=5)
    >>> news = DuckDuckGoSearch.static_call("AI news", search_type="news")
"""

import typing as tp
from datetime import datetime
from typing import Literal

from ..types import AgentBaseFn

_DDGS = None
_DDGS_AVAILABLE = None


def _get_ddgs():
    """Lazy import of DDGS to avoid crashing if the environment is incomplete.

    Returns:
        The DDGS class from the ddgs package.

    Raises:
        ImportError: If the ddgs package is not installed.
    """
    global _DDGS, _DDGS_AVAILABLE
    if _DDGS_AVAILABLE is None:
        try:
            from ddgs import DDGS

            _DDGS = DDGS
            _DDGS_AVAILABLE = True
        except ModuleNotFoundError:
            _DDGS_AVAILABLE = False
    if not _DDGS_AVAILABLE:
        raise ImportError("`ddgs` package is required but missing from the environment.")
    return _DDGS


[docs]class DuckDuckGoSearch(AgentBaseFn): """DuckDuckGo search tool for web, image, video, news, and map searches. Provides comprehensive search capabilities through the DuckDuckGo API with support for filtering, safe search, time limits, and domain restrictions. Implements lazy loading of the ddgs package. Attributes: SearchType: Literal type for search categories (text, images, videos, news, maps). TimeFilter: Literal type for time range filtering (day, week, month, year, None). SafeSearch: Literal type for safe search levels (strict, moderate, off). Methods: static_call: Perform a search with full filtering options. search_multiple_sources: Search across multiple content types. get_suggestions: Get search query suggestions. translate_query: Translate a query to another language. Example: >>> results = DuckDuckGoSearch.static_call( ... query="machine learning", ... search_type="text", ... n_results=10, ... timelimit="month" ... ) """ SearchType = Literal["text", "images", "videos", "news", "maps"] TimeFilter = Literal["day", "week", "month", "year", None] SafeSearch = Literal["strict", "moderate", "off"] @staticmethod def _maybe_truncate(text: str, limit: int | None) -> str: """Return the full text if limit is None, else the first `limit` chars. Args: text: The text to potentially truncate. limit: Maximum character limit, or None for no limit. Returns: The original text or truncated version. """ return text if limit is None else text[:limit] @staticmethod def _filter_by_domain(results: list[dict], domains: list[str] | None) -> list[dict]: """Filter results to only include specified domains. Args: results: List of search result dictionaries. domains: List of domain strings to filter by, or None. Returns: Filtered list containing only results from specified domains. """ if not domains: return results filtered = [] for result in results: url = result.get("url", "") if any(domain in url for domain in domains): filtered.append(result) return filtered @staticmethod def _filter_by_keywords(results: list[dict], keywords: list[str] | None, exclude: bool = False) -> list[dict]: """Filter results by keywords in title or snippet. Args: results: List of search result dictionaries. keywords: List of keywords to filter by, or None. exclude: If True, exclude results containing keywords. Returns: Filtered list based on keyword presence. """ if not keywords: return results filtered = [] for result in results: text = (result.get("title", "") + " " + result.get("snippet", "")).lower() has_keyword = any(keyword.lower() in text for keyword in keywords) if (has_keyword and not exclude) or (not has_keyword and exclude): filtered.append(result) return filtered @staticmethod def _append_text_results( results: list[dict], search_results: tp.Iterable[dict], n_results: int | None, title_length_limit: int | None, snippet_length_limit: int | None, ) -> None: """Normalize DuckDuckGo text results into the shared result shape.""" for r in search_results: results.append( { "title": DuckDuckGoSearch._maybe_truncate(r.get("title", ""), title_length_limit), "url": r.get("href", ""), "snippet": DuckDuckGoSearch._maybe_truncate(r.get("body", ""), snippet_length_limit), "source": "DuckDuckGo", } ) if n_results and len(results) >= n_results: break @staticmethod def _is_no_results_error(error: Exception) -> bool: """Return True when the provider error means an empty result set.""" return "no results found" in str(error).lower()
[docs] @staticmethod def static_call( query: str, search_type: SearchType = "text", n_results: int | None = 5, title_length_limit: int | None = 200, snippet_length_limit: int | None = 1_000, region: str = "us-en", safesearch: SafeSearch = "moderate", timelimit: TimeFilter = None, allowed_domains: list[str] | None = None, excluded_domains: list[str] | None = None, must_include_keywords: list[str] | None = None, exclude_keywords: list[str] | None = None, file_type: str | None = None, return_metadata: bool = False, **context_variables, ) -> list[dict] | dict: """ Perform an enhanced DuckDuckGo search with multiple options and filters. Use this tool when the model needs fresh public-web information rather than local workspace context. It supports regular text search plus images, videos, news, and maps. The tool normalizes provider output into compact result dictionaries so a model can scan titles, snippets, URLs, and metadata without scraping a page first. Args: query (str): Search keywords. The query can be plain language or can include search-style qualifiers. If ``file_type`` or domain filters are provided, they are merged into the outgoing query automatically. search_type (SearchType): Search vertical to use: ``"text"``, ``"images"``, ``"videos"``, ``"news"``, or ``"maps"``. n_results (int, optional): Number of results to return. Must be between 1 and 30. title_length_limit (int | None): Maximum number of characters kept from the result title. Set to ``None`` to keep titles in full. snippet_length_limit (int | None): Maximum number of characters kept from result body text or summary fields. Set to ``None`` to keep snippets in full. region (str): Region code such as ``"us-en"``, ``"uk-en"``, or ``"fr-fr"``. safesearch (SafeSearch): Safe-search level: ``"strict"``, ``"moderate"``, or ``"off"``. timelimit (TimeFilter): Optional recency filter such as ``"day"``, ``"week"``, ``"month"``, or ``"year"``. This is especially useful for news and fast-moving topics. allowed_domains (list[str] | None): Restrict results to these domains. This is implemented both by expanding the query and by filtering the returned URLs. excluded_domains (list[str] | None): Remove results from these domains. must_include_keywords (list[str] | None): Keep only results whose title or snippet contains at least one of the provided keywords. exclude_keywords (list[str] | None): Remove results whose title or snippet contains any of the provided keywords. file_type (str | None): Add a file-type constraint such as ``"pdf"`` or ``"doc"`` to the search query. return_metadata (bool): When ``True``, return a dictionary with ``results`` and additional metadata such as the final query string, timestamp, and filters applied. When ``False``, return only the results list. Returns: Union[list[dict], dict]: Either a list of result dictionaries or a metadata wrapper containing ``results`` and search context. Result items typically include keys such as ``title``, ``snippet``, ``url``, and type-specific fields like image source or publication date. """ if not query.strip(): raise ValueError("Query string must be non-empty") if isinstance(n_results, str): try: n_results = int(n_results) except ValueError: ... if n_results is not None and not (1 <= n_results <= 30): raise ValueError("n_results must be 1-30") if file_type: query = f"{query} filetype:{file_type}" if allowed_domains: site_query = " OR ".join(f"site:{domain}" for domain in allowed_domains) query = f"{query} ({site_query})" if excluded_domains: for domain in excluded_domains: query = f"{query} -site:{domain}" results: list[dict] = [] search_metadata = { "query": query, "search_type": search_type, "timestamp": datetime.now().isoformat(), "filters_applied": { "region": region, "safesearch": safesearch, "timelimit": timelimit, "file_type": file_type, "allowed_domains": allowed_domains, "excluded_domains": excluded_domains, }, } with _get_ddgs()() as ddgs: if search_type == "text": search_results = ddgs.text( query, region=region, safesearch=safesearch.capitalize() if safesearch else "Moderate", timelimit=timelimit, ) DuckDuckGoSearch._append_text_results( results, search_results, n_results=n_results, title_length_limit=title_length_limit, snippet_length_limit=snippet_length_limit, ) elif search_type == "images": search_results = ddgs.images( query, region=region, safesearch=safesearch.capitalize() if safesearch else "Moderate", timelimit=timelimit, ) for r in search_results: results.append( { "title": DuckDuckGoSearch._maybe_truncate(r.get("title", ""), title_length_limit), "url": r.get("url", ""), "image_url": r.get("image", ""), "thumbnail": r.get("thumbnail", ""), "source": r.get("source", ""), "width": r.get("width", 0), "height": r.get("height", 0), } ) if n_results and len(results) >= n_results: break elif search_type == "videos": search_results = ddgs.videos( query, region=region, safesearch=safesearch.capitalize() if safesearch else "Moderate", timelimit=timelimit, ) for r in search_results: results.append( { "title": DuckDuckGoSearch._maybe_truncate(r.get("title", ""), title_length_limit), "url": r.get("content", ""), "description": DuckDuckGoSearch._maybe_truncate( r.get("description", ""), snippet_length_limit ), "duration": r.get("duration", ""), "uploader": r.get("uploader", ""), "published": r.get("published", ""), "thumbnail": r.get("thumbnail", ""), } ) if n_results and len(results) >= n_results: break elif search_type == "news": news_safesearch = safesearch.lower() if safesearch else "moderate" if news_safesearch == "strict" and timelimit: news_safesearch = "moderate" news_failed_with_no_results = False try: search_results = ddgs.news( query, region=region, safesearch=news_safesearch, timelimit=timelimit, ) for r in search_results: results.append( { "title": DuckDuckGoSearch._maybe_truncate(r.get("title", ""), title_length_limit), "url": r.get("url", ""), "snippet": DuckDuckGoSearch._maybe_truncate(r.get("body", ""), snippet_length_limit), "source": r.get("source", ""), "date": r.get("date", ""), "image": r.get("image", ""), } ) if n_results and len(results) >= n_results: break except Exception as exc: if not DuckDuckGoSearch._is_no_results_error(exc): raise news_failed_with_no_results = True if news_failed_with_no_results or not results: search_metadata["fallback_applied"] = "news_to_text" search_metadata["effective_search_type"] = "text" search_results = ddgs.text( query, region=region, safesearch=safesearch.capitalize() if safesearch else "Moderate", timelimit=timelimit, ) DuckDuckGoSearch._append_text_results( results, search_results, n_results=n_results, title_length_limit=title_length_limit, snippet_length_limit=snippet_length_limit, ) elif search_type == "maps": search_results = ddgs.maps(query, place=region.split("-")[0] if region else None) for r in search_results: results.append( { "title": DuckDuckGoSearch._maybe_truncate(r.get("title", ""), title_length_limit), "address": r.get("address", ""), "country": r.get("country", ""), "city": r.get("city", ""), "phone": r.get("phone", ""), "latitude": r.get("latitude", ""), "longitude": r.get("longitude", ""), "url": r.get("url", ""), "desc": DuckDuckGoSearch._maybe_truncate(r.get("desc", ""), snippet_length_limit), "hours": r.get("hours", {}), } ) if n_results and len(results) >= n_results: break if must_include_keywords: results = DuckDuckGoSearch._filter_by_keywords(results, must_include_keywords, exclude=False) if exclude_keywords: results = DuckDuckGoSearch._filter_by_keywords(results, exclude_keywords, exclude=True) search_metadata.setdefault("effective_search_type", search_type) search_metadata["total_results"] = len(results) search_metadata["filters_applied"]["keyword_filters"] = { "must_include": must_include_keywords, "exclude": exclude_keywords, } if return_metadata: return {"results": results, "metadata": search_metadata} return results
[docs] @staticmethod def search_multiple_sources( query: str, sources: list[SearchType] | None = None, n_results_per_source: int = 3, **kwargs, ) -> dict[str, list[dict]]: """Search across multiple source types and return categorized results. Performs separate searches for each specified source type and aggregates the results into a single dictionary keyed by source. Errors for individual sources are captured without failing the entire operation. Args: query: The search query string to use across all sources. sources: List of search types to query. Each must be one of "text", "images", "videos", "news", "maps". Defaults to ["text", "news"] if None. n_results_per_source: Maximum number of results to return per source type. Defaults to 3. **kwargs: Additional keyword arguments forwarded to ``static_call`` (e.g., region, safesearch, timelimit). Returns: A dictionary mapping source type names to their respective result lists. If a source fails, its value is a dict with an "error" key describing the failure. Example: >>> results = DuckDuckGoSearch.search_multiple_sources( ... "Python programming", ... sources=["text", "news"], ... n_results_per_source=3 ... ) >>> print(len(results["text"])) 3 """ if sources is None: sources = ["text", "news"] all_results = {} for source in sources: try: results = DuckDuckGoSearch.static_call( query=query, search_type=source, n_results=n_results_per_source, **kwargs ) all_results[source] = results except Exception as e: all_results[source] = {"error": str(e)} return all_results
[docs] @staticmethod def get_suggestions(query: str, region: str = "us-en", **context_variables) -> list[str]: """Get search query suggestions (autocomplete) for a partial query. Retrieves search suggestions from DuckDuckGo's suggestion API, useful for expanding or refining queries before performing a full search. Args: query: Partial or full search query to get suggestions for. region: Region code for localized suggestions (e.g., "us-en", "uk-en", "de-de"). Defaults to "us-en". **context_variables: Runtime context from the agent (unused). Returns: A list of suggested search query strings. Returns an empty list if no suggestions are available or if the request fails. Example: >>> suggestions = DuckDuckGoSearch.get_suggestions("python prog") >>> print(suggestions) ['python programming', 'python programming language', ...] """ suggestions = [] with _get_ddgs()() as ddgs: try: results = ddgs.suggestions(query, region=region) suggestions = [r.get("phrase", "") for r in results if r.get("phrase")] except Exception as e: import logging logging.getLogger(__name__).debug(f"Failed to get suggestions for '{query}': {e}") return suggestions
[docs] @staticmethod def translate_query(query: str, to_language: str = "en", **context_variables) -> str: """Translate a search query to another language using DuckDuckGo. Uses DuckDuckGo's translation service to convert a query from its detected language to the specified target language. Falls back to returning the original query if translation fails. Args: query: The original search query to translate. to_language: Target language code (e.g., "en" for English, "es" for Spanish, "fr" for French, "de" for German). Defaults to "en". **context_variables: Runtime context from the agent (unused). Returns: The translated query string. If translation fails, returns the original query unchanged. Example: >>> translated = DuckDuckGoSearch.translate_query("hola mundo", to_language="en") >>> print(translated) 'hello world' """ with _get_ddgs()() as ddgs: try: result = ddgs.translate(query, to=to_language) return result.get("translated", query) except Exception as e: import logging logging.getLogger(__name__).debug(f"Failed to translate '{query}' to {to_language}: {e}") return query
__all__ = ("DuckDuckGoSearch",)