fix: Deduplicate requests by unique key before submitting them to the queue by janbuchar · Pull Request #499 · apify/crawlee-python
Expand Up
@@ -90,10 +90,10 @@ def _transform_request(self, request: str | BaseRequestData | Request) -> Reques
def _transform_requests(self, requests: Sequence[str | BaseRequestData | Request]) -> list[Request]: """Transforms a list of request-like objects into a list of Request objects.""" processed_requests: list[Request] = [] processed_requests = dict[str, Request]()
for request in requests: processed_request = self._transform_request(request) processed_requests.append(processed_request) processed_requests.setdefault(processed_request.unique_key, processed_request)
return processed_requests return list(processed_requests.values())
def _transform_requests(self, requests: Sequence[str | BaseRequestData | Request]) -> list[Request]: """Transforms a list of request-like objects into a list of Request objects.""" processed_requests: list[Request] = [] processed_requests = dict[str, Request]()
for request in requests: processed_request = self._transform_request(request) processed_requests.append(processed_request) processed_requests.setdefault(processed_request.unique_key, processed_request)
return processed_requests return list(processed_requests.values())