Correct / recommended way of using `user_data`
After the merge of this PR I receive type errors working with user_data. Consider this sample:
import asyncio from crawlee import Request from crawlee._utils.urls import convert_to_absolute_url, is_url_absolute from crawlee.configuration import Configuration from crawlee.parsel_crawler import ParselCrawler, ParselCrawlingContext from crawlee.router import Router router = Router[ParselCrawlingContext]() @router.default_handler async def default_handler(context: ParselCrawlingContext) -> None: for category in context.selector.xpath( '//div[@class="side_categories"]//ul/li/ul/li/a' ): item = {"title": category.xpath("normalize-space()").get()} url = category.xpath("./@href").get() if url is not None: if not is_url_absolute(url): url = str(convert_to_absolute_url(context.request.url, url)) request = Request.from_url(url, method="GET", label="detail") request.user_data["item"] = item # <--- TYPE ERROR await context.add_requests([request]) @router.handler("detail") async def detail_handler(context: ParselCrawlingContext) -> None: item = context.request.user_data["item"] item["results"] = context.selector.xpath("normalize-space(//form//strong[1])").get() # <-- TYPE ERROR await context.push_data(item) async def main() -> None: config = Configuration.get_global_configuration() config.persist_storage = False config.write_metadata = False crawler = ParselCrawler(request_handler=router) await crawler.run(["https://books.toscrape.com"]) data = await crawler.get_data() print(data.items) if __name__ == "__main__": asyncio.run(main())
Both in VS Code (with Pylance) and CLI (mypy) I get type errors on the highlighted spots. Mypy reports this:
./venv/bin/mypy main.py
main.py:23: error: Incompatible types in assignment (expression has type "dict[str, str | None]", target has type "JsonValue") [assignment]
main.py:30: error: Unsupported target for indexed assignment ("list[JsonValue] | dict[str, JsonValue] | str | bool | int | float | None") [index]
main.py:30: error: No overload variant of "__setitem__" of "list" matches argument types "str", "str | None" [call-overload]
main.py:30: note: Possible overload variants:
main.py:30: note: def __setitem__(self, SupportsIndex, JsonValue, /) -> None
main.py:30: note: def __setitem__(self, slice, Iterable[JsonValue], /) -> None
Found 3 errors in 1 file (checked 1 source file)