fix: Fix `same-domain` strategy ignoring public suffix by Pijukatel · Pull Request #1572 · apify/crawlee-python
Expand Up
@@ -347,6 +347,7 @@ class AddRequestsTestInput:
'https://blog.someplace.com/index.html',
'https://redirect.someplace.com',
'https://other.place.com/index.html',
'https://someplace.jp/',
)
INCLUDE_TEST_URLS = ( Expand Down Expand Up @@ -401,7 +402,7 @@ class AddRequestsTestInput: AddRequestsTestInput( start_url=STRATEGY_TEST_URLS[0], loaded_url=STRATEGY_TEST_URLS[0], requests=STRATEGY_TEST_URLS[:4], requests=STRATEGY_TEST_URLS, kwargs=EnqueueLinksKwargs(strategy='same-domain'), expected_urls=STRATEGY_TEST_URLS[1:4], ), Expand All @@ -411,7 +412,7 @@ class AddRequestsTestInput: AddRequestsTestInput( start_url=STRATEGY_TEST_URLS[0], loaded_url=STRATEGY_TEST_URLS[0], requests=STRATEGY_TEST_URLS[:4], requests=STRATEGY_TEST_URLS, kwargs=EnqueueLinksKwargs(strategy='same-hostname'), expected_urls=[STRATEGY_TEST_URLS[1]], ), Expand All @@ -421,7 +422,7 @@ class AddRequestsTestInput: AddRequestsTestInput( start_url=STRATEGY_TEST_URLS[0], loaded_url=STRATEGY_TEST_URLS[0], requests=STRATEGY_TEST_URLS[:4], requests=STRATEGY_TEST_URLS, kwargs=EnqueueLinksKwargs(strategy='same-origin'), expected_urls=[], ), Expand Down
INCLUDE_TEST_URLS = ( Expand Down Expand Up @@ -401,7 +402,7 @@ class AddRequestsTestInput: AddRequestsTestInput( start_url=STRATEGY_TEST_URLS[0], loaded_url=STRATEGY_TEST_URLS[0], requests=STRATEGY_TEST_URLS[:4], requests=STRATEGY_TEST_URLS, kwargs=EnqueueLinksKwargs(strategy='same-domain'), expected_urls=STRATEGY_TEST_URLS[1:4], ), Expand All @@ -411,7 +412,7 @@ class AddRequestsTestInput: AddRequestsTestInput( start_url=STRATEGY_TEST_URLS[0], loaded_url=STRATEGY_TEST_URLS[0], requests=STRATEGY_TEST_URLS[:4], requests=STRATEGY_TEST_URLS, kwargs=EnqueueLinksKwargs(strategy='same-hostname'), expected_urls=[STRATEGY_TEST_URLS[1]], ), Expand All @@ -421,7 +422,7 @@ class AddRequestsTestInput: AddRequestsTestInput( start_url=STRATEGY_TEST_URLS[0], loaded_url=STRATEGY_TEST_URLS[0], requests=STRATEGY_TEST_URLS[:4], requests=STRATEGY_TEST_URLS, kwargs=EnqueueLinksKwargs(strategy='same-origin'), expected_urls=[], ), Expand Down