fix: restore proxy functionality for PlaywrightCrawler broken in v0.5 by Mantisus · Pull Request #889 · apify/crawlee-python

Expand Up @@ -18,11 +18,12 @@ PW_CHROMIUM_HEADLESS_DEFAULT_USER_AGENT, PW_FIREFOX_HEADLESS_DEFAULT_USER_AGENT, ) from crawlee.proxy_configuration import ProxyConfiguration
if TYPE_CHECKING: from yarl import URL
from crawlee.crawlers import PlaywrightCrawlingContext from crawlee.crawlers import PlaywrightCrawlingContext, PlaywrightPreNavCrawlingContext

async def test_basic_request(httpbin: URL) -> None: Expand Down Expand Up @@ -188,3 +189,28 @@ async def request_handler(_context: PlaywrightCrawlingContext) -> None: await crawler.run(['https://example.com', str(httpbin)])
assert mock_hook.call_count == 2

async def test_proxy_set() -> None: # Configure crawler with proxy settings proxy_value = 'http://1111:1111' crawler = PlaywrightCrawler(proxy_configuration=ProxyConfiguration(proxy_urls=[proxy_value]))
handler_data = {}
mock_handler = mock.AsyncMock(return_value=None) crawler.router.default_handler(mock_handler)
# Use pre_navigation_hook to verify proxy and configure playwright route @crawler.pre_navigation_hook async def some_hook(context: PlaywrightPreNavCrawlingContext) -> None: if context.proxy_info: # Store information about the used proxy handler_data['proxy'] = context.proxy_info.url
# Emulate server response to prevent Playwright from making real requests await context.page.route('**/*', lambda route: route.fulfill(status=200))
await crawler.run(['https://test.com'])
assert handler_data.get('proxy') == proxy_value