Source code for calute.operators.browser

# Copyright 2025 The EasyDeL/Calute Author @erfanzar (Erfan Zare Chavoshi).
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""Playwright-backed browser state manager for operator tooling.

Provides :class:`BrowserManager`, which lazily initialises a Chromium
browser via Playwright and manages a pool of tracked pages.  Each page
is represented by a lightweight :class:`BrowserPageState` dataclass that
records the page reference ID, current URL, title, and extracted links.
"""

from __future__ import annotations

import re
import tempfile
import typing as tp
import uuid
from dataclasses import dataclass, field
from pathlib import Path


[docs]@dataclass class BrowserPageState: """Tracked state for an opened browser page. Attributes: ref_id: Unique reference identifier used to address this page across operator tool calls. url: The last-known URL loaded by this page. title: The page title extracted after navigation. link_map: Mapping of numeric link IDs to their ``href`` values, populated after each page load or refresh. """ ref_id: str url: str title: str = "" link_map: dict[int, str] = field(default_factory=dict)
[docs]class BrowserManager: """Manage a shared Playwright browser and tracked pages. The manager lazily starts a Chromium browser on the first call that requires a live page. All pages opened through the manager are tracked by a generated ``ref_id`` so that subsequent operator tool calls (click, find, screenshot) can address them without re-opening. Attributes: _headless: Whether the browser runs in headless mode. _screenshot_dir: Optional directory for screenshot output. _playwright: Playwright instance, created lazily. _browser: Chromium browser instance, created lazily. _context: Default browser context. _pages: Mapping of ``ref_id`` to live Playwright page objects. _page_state: Mapping of ``ref_id`` to :class:`BrowserPageState`. """ def __init__(self, *, headless: bool = True, screenshot_dir: str | None = None) -> None: """Initialise the browser manager. Args: headless: If ``True``, the Chromium browser is launched without a visible window. Defaults to ``True``. screenshot_dir: Optional directory path where screenshots are saved. When ``None``, a temporary directory is created per screenshot call. """ self._headless = headless self._screenshot_dir = screenshot_dir self._playwright = None self._browser = None self._context = None self._pages: dict[str, tp.Any] = {} self._page_state: dict[str, BrowserPageState] = {}
[docs] async def open(self, *, url: str | None = None, ref_id: str | None = None, wait_ms: int = 500) -> dict[str, tp.Any]: """Open a URL or inspect an existing tracked page. Either ``url`` or ``ref_id`` must be provided. When ``url`` is given, a new page is created (or an existing page navigated) and its metadata is returned. When only ``ref_id`` is given, the currently loaded page is re-inspected. Args: url: URL to navigate to. A new tracked page is created when no ``ref_id`` is supplied alongside the URL. ref_id: Reference identifier of a previously opened page to re-inspect without navigating. wait_ms: Milliseconds to wait after navigation before extracting page metadata. Defaults to ``500``. Returns: A dictionary containing the page ``ref_id``, current URL, title, a truncated content preview (first 2000 characters), and a list of extracted links with numeric IDs. Raises: ValueError: If neither ``url`` nor ``ref_id`` is provided, or if the given ``ref_id`` does not match any tracked page. """ page, state = await self._resolve_page(url=url, ref_id=ref_id) if url is not None: await page.goto(url, wait_until="domcontentloaded") await page.wait_for_timeout(wait_ms) state.url = page.url state.title = await page.title() state.link_map = await self._extract_link_map(page) content = await page.locator("body").inner_text() return { "ref_id": state.ref_id, "url": page.url, "title": state.title, "content_preview": content[:2000], "links": [{"id": idx, "url": href} for idx, href in sorted(state.link_map.items())], }
[docs] async def click( self, ref_id: str, *, link_id: int | None = None, selector: str | None = None, text: str | None = None, wait_ms: int = 500, ) -> dict[str, tp.Any]: """Click an element on a tracked page. Exactly one of ``link_id``, ``selector``, or ``text`` must be provided to identify the target element. Args: ref_id: Reference identifier of the tracked page. link_id: Numeric link identifier from the page's :attr:`BrowserPageState.link_map`. When provided, the browser navigates to the corresponding ``href``. selector: CSS selector of the element to click. text: Visible text used to locate the element via Playwright's ``get_by_text``. wait_ms: Milliseconds to wait after the click before refreshing the page metadata. Defaults to ``500``. Returns: The refreshed page metadata dictionary (same shape as :meth:`open`). Raises: ValueError: If the ``ref_id`` is unknown, the ``link_id`` is not found, or none of the three target parameters is provided. """ page = self._require_page(ref_id) state = self._page_state[ref_id] if link_id is not None: href = state.link_map.get(link_id) if href is None: raise ValueError(f"Link id {link_id} not found for page {ref_id}") await page.goto(href, wait_until="domcontentloaded") elif selector: await page.locator(selector).first.click() elif text: await page.get_by_text(text).first.click() else: raise ValueError("click requires link_id, selector, or text") await page.wait_for_timeout(wait_ms) return await self.open(ref_id=ref_id)
[docs] async def find(self, ref_id: str, pattern: str) -> dict[str, tp.Any]: """Find text matches on a tracked page. Performs a case-insensitive regular expression search across the visible body text of the referenced page. Args: ref_id: Reference identifier of the tracked page to search. pattern: Regular expression pattern to match against the page's visible text content. Returns: A dictionary with the ``ref_id``, the ``pattern`` used, the total ``match_count``, and up to 20 matching strings. Raises: ValueError: If the ``ref_id`` does not correspond to a tracked page. """ page = self._require_page(ref_id) body_text = await page.locator("body").inner_text() regex = re.compile(pattern, re.IGNORECASE) matches = regex.findall(body_text) return { "ref_id": ref_id, "pattern": pattern, "match_count": len(matches), "matches": matches[:20], }
[docs] async def screenshot(self, ref_id: str, *, path: str | None = None, full_page: bool = True) -> dict[str, tp.Any]: """Capture a screenshot of a tracked page. Args: ref_id: Reference identifier of the tracked page to capture. path: Optional file path for the screenshot. If omitted, a default path inside the configured screenshot directory (or a temporary directory) is used. full_page: When ``True``, capture the entire scrollable page instead of just the visible viewport. Defaults to ``True``. Returns: A dictionary containing the ``ref_id``, saved file ``path``, and the ``full_page`` flag. Raises: ValueError: If the ``ref_id`` is not tracked. """ page = self._require_page(ref_id) screenshot_path = path or self._default_screenshot_path(ref_id) await page.screenshot(path=screenshot_path, full_page=full_page) return {"ref_id": ref_id, "path": screenshot_path, "full_page": full_page}
[docs] def list_pages(self) -> list[dict[str, str]]: """Return summaries for tracked pages. Returns: A list of dictionaries, each containing the ``ref_id``, ``url``, and ``title`` of a tracked page, sorted by ``ref_id``. """ return [ {"ref_id": ref_id, "url": state.url, "title": state.title} for ref_id, state in sorted(self._page_state.items()) ]
async def _resolve_page(self, *, url: str | None, ref_id: str | None) -> tuple[tp.Any, BrowserPageState]: """Resolve or create a Playwright page and its tracking state. If ``ref_id`` is given, the existing page and state are returned. If only ``url`` is given, a new page is created in the shared browser context. Args: url: URL for which a new page should be created when no ``ref_id`` is provided. ref_id: Reference identifier of an existing tracked page. Returns: A tuple of ``(page, state)`` where *page* is the Playwright page object and *state* is the :class:`BrowserPageState`. Raises: ValueError: If neither ``url`` nor ``ref_id`` is provided, or the ``ref_id`` is unknown. """ await self._ensure_browser() if ref_id is not None: return self._require_page(ref_id), self._page_state[ref_id] if url is None: raise ValueError("open requires url or ref_id") page = await self._context.new_page() ref_id = f"page_{uuid.uuid4().hex[:10]}" state = BrowserPageState(ref_id=ref_id, url=url) self._pages[ref_id] = page self._page_state[ref_id] = state return page, state def _require_page(self, ref_id: str) -> tp.Any: """Return a tracked Playwright page or raise. Args: ref_id: Reference identifier of the page to retrieve. Returns: The live Playwright page object. Raises: ValueError: If no page with the given ``ref_id`` exists. """ if ref_id not in self._pages: raise ValueError(f"Browser page not found: {ref_id}") return self._pages[ref_id] async def _ensure_browser(self) -> None: """Lazily start the Playwright Chromium browser. Called automatically before any page operation. If the browser is already running this method is a no-op. Raises: RuntimeError: If the ``playwright`` package is not installed. """ if self._browser is not None: return try: from playwright.async_api import async_playwright except ImportError as exc: # pragma: no cover - environment dependent raise RuntimeError("Playwright is required for browser operator tools") from exc self._playwright = await async_playwright().start() self._browser = await self._playwright.chromium.launch(headless=self._headless) self._context = await self._browser.new_context() async def _extract_link_map(self, page: tp.Any) -> dict[int, str]: """Extract an ordered mapping of link hrefs from the page. Args: page: Playwright page object to inspect. Returns: A dictionary mapping sequential integer IDs to the ``href`` values of all ``<a>`` elements found on the page. """ links = await page.locator("a[href]").evaluate_all("(els) => els.map((el) => el.href).filter(Boolean)") return {index: href for index, href in enumerate(links)} def _default_screenshot_path(self, ref_id: str) -> str: """Compute a default screenshot file path. Uses the configured :attr:`_screenshot_dir` when set, otherwise creates a temporary directory. Args: ref_id: Page reference identifier used to construct the filename. Returns: Absolute path string for the screenshot file. """ if self._screenshot_dir: directory = Path(self._screenshot_dir) directory.mkdir(parents=True, exist_ok=True) else: directory = Path(tempfile.mkdtemp(prefix="calute-browser-")) return str(directory / f"{ref_id}.png")