bpo-25643: Fix tokenizer error when raw decoding null bytes by pablogsal · Pull Request #25080 · python/cpython
diff --git a/Parser/tokenizer.c b/Parser/tokenizer.c
index d18fffa..d8ccb44 100644
--- a/Parser/tokenizer.c
+++ b/Parser/tokenizer.c
@@ -963,10 +963,14 @@ struct tok_state *
if (tok->lineno > 2) {
tok->decoding_state = STATE_NORMAL;
}
- else if (!check_coding_spec(tok->cur, tok->end - tok->cur,
- tok, fp_setreadl))
- {
- return 0;
+ else {
+ Py_ssize_t size = tok->end - tok->cur;
+ Py_ssize_t len = strnlen(tok->cur, size);
+ size = Py_MIN(size, len);
+ if (!check_coding_spec(tok->cur, size, tok, fp_setreadl))
+ {
+ return 0;
+ }
}
}
/* The default encoding is UTF-8, so make sure we don't have any