"""Crawlee + CloakBrowser: stealth web crawling with PlaywrightCrawler.

Uses a custom BrowserPlugin to swap Crawlee's default Chromium
for CloakBrowser's patched binary with source-level fingerprint patches.

Requires: pip install cloakbrowser "crawlee[playwright]"
"""

import asyncio

from cloakbrowser.config import IGNORE_DEFAULT_ARGS, get_default_stealth_args
from cloakbrowser.download import ensure_binary
from typing_extensions import override

from crawlee.browsers import (
    BrowserPool,
    PlaywrightBrowserController,
    PlaywrightBrowserPlugin,
)
from crawlee.crawlers import PlaywrightCrawler, PlaywrightCrawlingContext


class CloakBrowserPlugin(PlaywrightBrowserPlugin):
    """Browser plugin that uses CloakBrowser's patched Chromium,
    but otherwise keeps the functionality of PlaywrightBrowserPlugin.
    """

    @override
    async def new_browser(self) -> PlaywrightBrowserController:
        if not self._playwright:
            raise RuntimeError('Playwright browser plugin is not initialized.')

        binary_path = ensure_binary()
        stealth_args = get_default_stealth_args()

        # Merge CloakBrowser stealth args with any user-provided launch options.
        launch_options = dict(self._browser_launch_options)
        launch_options.pop('executable_path', None)
        launch_options.pop('chromium_sandbox', None)
        existing_args = list(launch_options.pop('args', []))
        launch_options['args'] = [*existing_args, *stealth_args]

        return PlaywrightBrowserController(
            browser=await self._playwright.chromium.launch(
                executable_path=binary_path,
                ignore_default_args=IGNORE_DEFAULT_ARGS,
                **launch_options,
            ),
            max_open_pages_per_browser=1,
            # CloakBrowser handles fingerprints at the binary level.
            header_generator=None,
        )


async def main() -> None:
    crawler = PlaywrightCrawler(
        max_requests_per_crawl=10,
        browser_pool=BrowserPool(plugins=[CloakBrowserPlugin()]),
    )

    @crawler.router.default_handler
    async def request_handler(context: PlaywrightCrawlingContext) -> None:
        context.log.info(f'Processing {context.request.url} ...')
        title = await context.page.title()
        await context.push_data({'url': context.request.url, 'title': title})
        await context.enqueue_links()

    await crawler.run(['https://example.com'])


if __name__ == '__main__':
    asyncio.run(main())