Unable to execute POST request with JSON payload
Example
async def main() -> None: crawler = HttpCrawler() # Define the default request handler, which will be called for every request. @crawler.router.default_handler async def request_handler(context: HttpCrawlingContext) -> None: context.log.info(f'Processing {context.request.url} ...') response = context.http_response.read().decode('utf-8') context.log.info(f'Response: {response}') # To see the response in the logs. # Prepare a POST request to the form endpoint. request = Request.from_url( url='https://httpbin.org/post', method='POST', headers = {"content-type": "application/json"}, data={ 'custname': 'John Doe', 'custtel': '1234567890', 'custemail': 'johndoe@example.com', 'size': 'large', 'topping': ['bacon', 'cheese', 'mushroom'], 'delivery': '13:00', 'comments': 'Please ring the doorbell upon arrival.', }, ) await crawler.run([request])
Current response format
{
"args": {},
"data": "custname=John+Doe&custtel=1234567890&custemail=johndoe%40example.com&size=large&topping=bacon&topping=cheese&topping=mushroom&delivery=13%3A00&comments=Please+ring+the+doorbell+upon+arrival.",
"files": {},
"form": {},
"headers": {
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7",
"Accept-Encoding": "gzip, deflate, br",
"Accept-Language": "en-US,en;q=0.9",
"Content-Length": "190",
"Content-Type": "application/json",
"Host": "httpbin.org",
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36",
"X-Amzn-Trace-Id": "Root=1-66fc90cf-11644d4f5483e1096211b721"
},
"json": null,
"origin": "91.240.96.149",
"url": "https://httpbin.org/post"
}
Expected response format
{
"args": {},
"data": "{\"custname\": \"John Doe\", \"custtel\": \"1234567890\", \"custemail\": \"johndoe@example.com\", \"size\": \"large\", \"topping\": [\"bacon\", \"cheese\", \"mushroom\"], \"delivery\": \"13:00\", \"comments\": \"Please ring the doorbell upon arrival.\"}",
"files": {},
"form": {},
"headers": {
"Accept": "*/*",
"Accept-Encoding": "gzip, deflate, br",
"Content-Length": "221",
"Content-Type": "application/json",
"Host": "httpbin.org",
"User-Agent": "python-httpx/0.27.2",
"X-Amzn-Trace-Id": "Root=1-66fc91c2-6db9989347fef25b150615e2"
},
"json": {
"comments": "Please ring the doorbell upon arrival.",
"custemail": "johndoe@example.com",
"custname": "John Doe",
"custtel": "1234567890",
"delivery": "13:00",
"size": "large",
"topping": [
"bacon",
"cheese",
"mushroom"
]
},
"origin": "91.240.96.149",
"url": "https://httpbin.org/post"
}Both HTTPX and curl_impersonate allow creating a “POST” request with JSON payload, in two ways
data={ 'custname': 'John Doe', 'custtel': '1234567890', 'custemail': 'johndoe@example.com', 'size': 'large', 'topping': ['bacon', 'cheese', 'mushroom'], 'delivery': '13:00', 'comments': 'Please ring the doorbell upon arrival.', } response = httpx.post(url, json=data) response = httpx.post(url, data=json.dumps(data))
But we can't reproduce this behavior in Crawlee, because the json parameter is not passed when the request is created and the data parameter cannot be a string