IndexError: list index out of range
import asyncio from crawlee.playwright_crawler import ( PlaywrightCrawler, PlaywrightCrawlingContext, ) async def main() -> None: crawler = PlaywrightCrawler( # Limit the crawl to max requests. Remove or increase it for crawling all links. max_requests_per_crawl=10, ) # Define the default request handler, which will be called for every request. @crawler.router.default_handler async def request_handler(context: PlaywrightCrawlingContext) -> None: context.log.info(f"Processing {context.request.url} ...") # Extract data from the page. data = { "url": context.request.url, "title": context.page.title.string if context.page.title else None, } # Enqueue all links found on the page. await context.enqueue_links() # Push the extracted data to the default dataset. await context.push_data(data) # Run the crawler with the initial list of URLs. await crawler.run(["https://crawlee.dev"]) # Export the entire dataset to a CSV file. await crawler.export_data("results.csv") if __name__ == "__main__": asyncio.run(main())
TERMINAL:
Traceback (most recent call last):
File "g:\python\python_experiment\my-crawler\my-crawler\routes.py", line 40, in <module>
asyncio.run(main())
File "D:\python3.9\lib\asyncio\runners.py", line 44, in run
return loop.run_until_complete(main)
File "D:\python3.9\lib\asyncio\base_events.py", line 642, in run_until_complete
return future.result()
File "g:\python\python_experiment\my-crawler\my-crawler\routes.py", line 36, in main
await crawler.export_data("results.csv")
File "g:\python\python_experiment\venv\lib\site-packages\crawlee\basic_crawler\basic_crawler.py", line 474, in export_data
return await dataset.write_to(content_type, path.open('w', newline=''))
File "g:\python\python_experiment\venv\lib\site-packages\crawlee\storages\dataset.py", line 213, in write_to
writer.writerows([items[0].keys(), *[item.values() for item in items]])
IndexError: list index out of range