Source code for calute.operators.browser

# Copyright 2025 The EasyDeL/Calute Author @erfanzar (Erfan Zare Chavoshi).
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""Playwright-backed browser state manager for operator tooling.

Provides :class:`BrowserManager`, which lazily initialises a Chromium
browser via Playwright and manages a pool of tracked pages.  Each page
is represented by a lightweight :class:`BrowserPageState` dataclass that
records the page reference ID, current URL, title, and extracted links.
"""

from __future__ import annotations

import re
import tempfile
import typing as tp
import uuid
from dataclasses import dataclass, field
from pathlib import Path


[docs]@dataclass
class BrowserPageState:
    """Tracked state for an opened browser page.

    Attributes:
        ref_id: Unique reference identifier used to address this page
            across operator tool calls.
        url: The last-known URL loaded by this page.
        title: The page title extracted after navigation.
        link_map: Mapping of numeric link IDs to their ``href`` values,
            populated after each page load or refresh.
    """

    ref_id: str
    url: str
    title: str = ""
    link_map: dict[int, str] = field(default_factory=dict)


[docs]class BrowserManager:
    """Manage a shared Playwright browser and tracked pages.

    The manager lazily starts a Chromium browser on the first call that
    requires a live page.  All pages opened through the manager are
    tracked by a generated ``ref_id`` so that subsequent operator tool
    calls (click, find, screenshot) can address them without re-opening.

    Attributes:
        _headless: Whether the browser runs in headless mode.
        _screenshot_dir: Optional directory for screenshot output.
        _playwright: Playwright instance, created lazily.
        _browser: Chromium browser instance, created lazily.
        _context: Default browser context.
        _pages: Mapping of ``ref_id`` to live Playwright page objects.
        _page_state: Mapping of ``ref_id`` to :class:`BrowserPageState`.
    """

    def __init__(self, *, headless: bool = True, screenshot_dir: str | None = None) -> None:
        """Initialise the browser manager.

        Args:
            headless: If ``True``, the Chromium browser is launched
                without a visible window.  Defaults to ``True``.
            screenshot_dir: Optional directory path where screenshots
                are saved.  When ``None``, a temporary directory is
                created per screenshot call.
        """
        self._headless = headless
        self._screenshot_dir = screenshot_dir
        self._playwright = None
        self._browser = None
        self._context = None
        self._pages: dict[str, tp.Any] = {}
        self._page_state: dict[str, BrowserPageState] = {}

[docs]    async def open(self, *, url: str | None = None, ref_id: str | None = None, wait_ms: int = 500) -> dict[str, tp.Any]:
        """Open a URL or inspect an existing tracked page.

        Either ``url`` or ``ref_id`` must be provided.  When ``url`` is
        given, a new page is created (or an existing page navigated) and
        its metadata is returned.  When only ``ref_id`` is given, the
        currently loaded page is re-inspected.

        Args:
            url: URL to navigate to.  A new tracked page is created when
                no ``ref_id`` is supplied alongside the URL.
            ref_id: Reference identifier of a previously opened page to
                re-inspect without navigating.
            wait_ms: Milliseconds to wait after navigation before
                extracting page metadata.  Defaults to ``500``.

        Returns:
            A dictionary containing the page ``ref_id``, current URL,
            title, a truncated content preview (first 2000 characters),
            and a list of extracted links with numeric IDs.

        Raises:
            ValueError: If neither ``url`` nor ``ref_id`` is provided,
                or if the given ``ref_id`` does not match any tracked
                page.
        """
        page, state = await self._resolve_page(url=url, ref_id=ref_id)
        if url is not None:
            await page.goto(url, wait_until="domcontentloaded")
            await page.wait_for_timeout(wait_ms)
            state.url = page.url
        state.title = await page.title()
        state.link_map = await self._extract_link_map(page)
        content = await page.locator("body").inner_text()
        return {
            "ref_id": state.ref_id,
            "url": page.url,
            "title": state.title,
            "content_preview": content[:2000],
            "links": [{"id": idx, "url": href} for idx, href in sorted(state.link_map.items())],
        }

[docs]    async def click(
        self,
        ref_id: str,
        *,
        link_id: int | None = None,
        selector: str | None = None,
        text: str | None = None,
        wait_ms: int = 500,
    ) -> dict[str, tp.Any]:
        """Click an element on a tracked page.

        Exactly one of ``link_id``, ``selector``, or ``text`` must be
        provided to identify the target element.

        Args:
            ref_id: Reference identifier of the tracked page.
            link_id: Numeric link identifier from the page's
                :attr:`BrowserPageState.link_map`.  When provided, the
                browser navigates to the corresponding ``href``.
            selector: CSS selector of the element to click.
            text: Visible text used to locate the element via
                Playwright's ``get_by_text``.
            wait_ms: Milliseconds to wait after the click before
                refreshing the page metadata.  Defaults to ``500``.

        Returns:
            The refreshed page metadata dictionary (same shape as
            :meth:`open`).

        Raises:
            ValueError: If the ``ref_id`` is unknown, the ``link_id``
                is not found, or none of the three target parameters
                is provided.
        """
        page = self._require_page(ref_id)
        state = self._page_state[ref_id]
        if link_id is not None:
            href = state.link_map.get(link_id)
            if href is None:
                raise ValueError(f"Link id {link_id} not found for page {ref_id}")
            await page.goto(href, wait_until="domcontentloaded")
        elif selector:
            await page.locator(selector).first.click()
        elif text:
            await page.get_by_text(text).first.click()
        else:
            raise ValueError("click requires link_id, selector, or text")
        await page.wait_for_timeout(wait_ms)
        return await self.open(ref_id=ref_id)

[docs]    async def find(self, ref_id: str, pattern: str) -> dict[str, tp.Any]:
        """Find text matches on a tracked page.

        Performs a case-insensitive regular expression search across the
        visible body text of the referenced page.

        Args:
            ref_id: Reference identifier of the tracked page to search.
            pattern: Regular expression pattern to match against the
                page's visible text content.

        Returns:
            A dictionary with the ``ref_id``, the ``pattern`` used, the
            total ``match_count``, and up to 20 matching strings.

        Raises:
            ValueError: If the ``ref_id`` does not correspond to a
                tracked page.
        """
        page = self._require_page(ref_id)
        body_text = await page.locator("body").inner_text()
        regex = re.compile(pattern, re.IGNORECASE)
        matches = regex.findall(body_text)
        return {
            "ref_id": ref_id,
            "pattern": pattern,
            "match_count": len(matches),
            "matches": matches[:20],
        }

[docs]    async def screenshot(self, ref_id: str, *, path: str | None = None, full_page: bool = True) -> dict[str, tp.Any]:
        """Capture a screenshot of a tracked page.

        Args:
            ref_id: Reference identifier of the tracked page to
                capture.
            path: Optional file path for the screenshot.  If omitted, a
                default path inside the configured screenshot directory
                (or a temporary directory) is used.
            full_page: When ``True``, capture the entire scrollable page
                instead of just the visible viewport.  Defaults to
                ``True``.

        Returns:
            A dictionary containing the ``ref_id``, saved file ``path``,
            and the ``full_page`` flag.

        Raises:
            ValueError: If the ``ref_id`` is not tracked.
        """
        page = self._require_page(ref_id)
        screenshot_path = path or self._default_screenshot_path(ref_id)
        await page.screenshot(path=screenshot_path, full_page=full_page)
        return {"ref_id": ref_id, "path": screenshot_path, "full_page": full_page}

[docs]    def list_pages(self) -> list[dict[str, str]]:
        """Return summaries for tracked pages.

        Returns:
            A list of dictionaries, each containing the ``ref_id``,
            ``url``, and ``title`` of a tracked page, sorted by
            ``ref_id``.
        """
        return [
            {"ref_id": ref_id, "url": state.url, "title": state.title}
            for ref_id, state in sorted(self._page_state.items())
        ]

    async def _resolve_page(self, *, url: str | None, ref_id: str | None) -> tuple[tp.Any, BrowserPageState]:
        """Resolve or create a Playwright page and its tracking state.

        If ``ref_id`` is given, the existing page and state are returned.
        If only ``url`` is given, a new page is created in the shared
        browser context.

        Args:
            url: URL for which a new page should be created when no
                ``ref_id`` is provided.
            ref_id: Reference identifier of an existing tracked page.

        Returns:
            A tuple of ``(page, state)`` where *page* is the Playwright
            page object and *state* is the :class:`BrowserPageState`.

        Raises:
            ValueError: If neither ``url`` nor ``ref_id`` is provided,
                or the ``ref_id`` is unknown.
        """
        await self._ensure_browser()
        if ref_id is not None:
            return self._require_page(ref_id), self._page_state[ref_id]
        if url is None:
            raise ValueError("open requires url or ref_id")
        page = await self._context.new_page()
        ref_id = f"page_{uuid.uuid4().hex[:10]}"
        state = BrowserPageState(ref_id=ref_id, url=url)
        self._pages[ref_id] = page
        self._page_state[ref_id] = state
        return page, state

    def _require_page(self, ref_id: str) -> tp.Any:
        """Return a tracked Playwright page or raise.

        Args:
            ref_id: Reference identifier of the page to retrieve.

        Returns:
            The live Playwright page object.

        Raises:
            ValueError: If no page with the given ``ref_id`` exists.
        """
        if ref_id not in self._pages:
            raise ValueError(f"Browser page not found: {ref_id}")
        return self._pages[ref_id]

    async def _ensure_browser(self) -> None:
        """Lazily start the Playwright Chromium browser.

        Called automatically before any page operation.  If the browser
        is already running this method is a no-op.

        Raises:
            RuntimeError: If the ``playwright`` package is not
                installed.
        """
        if self._browser is not None:
            return
        try:
            from playwright.async_api import async_playwright
        except ImportError as exc:  # pragma: no cover - environment dependent
            raise RuntimeError("Playwright is required for browser operator tools") from exc

        self._playwright = await async_playwright().start()
        self._browser = await self._playwright.chromium.launch(headless=self._headless)
        self._context = await self._browser.new_context()

    async def _extract_link_map(self, page: tp.Any) -> dict[int, str]:
        """Extract an ordered mapping of link hrefs from the page.

        Args:
            page: Playwright page object to inspect.

        Returns:
            A dictionary mapping sequential integer IDs to the ``href``
            values of all ``<a>`` elements found on the page.
        """
        links = await page.locator("a[href]").evaluate_all("(els) => els.map((el) => el.href).filter(Boolean)")
        return {index: href for index, href in enumerate(links)}

    def _default_screenshot_path(self, ref_id: str) -> str:
        """Compute a default screenshot file path.

        Uses the configured :attr:`_screenshot_dir` when set, otherwise
        creates a temporary directory.

        Args:
            ref_id: Page reference identifier used to construct the
                filename.

        Returns:
            Absolute path string for the screenshot file.
        """
        if self._screenshot_dir:
            directory = Path(self._screenshot_dir)
            directory.mkdir(parents=True, exist_ok=True)
        else:
            directory = Path(tempfile.mkdtemp(prefix="calute-browser-"))
        return str(directory / f"{ref_id}.png")