bpo-31658: Make xml.sax.parse accepting Path objects · python/cpython@04498f8

4 files changed

lines changed

Original file line numberDiff line numberDiff line change

@@ -102,13 +102,16 @@ The :class:`XMLReader` interface supports the following methods:

102102
103103

Process an input source, producing SAX events. The *source* object can be a

104104

system identifier (a string identifying the input source -- typically a file

105-

name or a URL), a file-like object, or an :class:`InputSource` object. When

106-

:meth:`parse` returns, the input is completely processed, and the parser object

107-

can be discarded or reset.

105+

name or a URL), a :class:`~pathlib.Path` object, a file-like object or an

106+

:class:`InputSource` object. When :meth:`parse` returns, the input is

107+

completely processed, and the parser object can be discarded or reset.

108108
109109

.. versionchanged:: 3.5

110110

Added support of character streams.

111111
112+

.. versionchanged:: 3.8

113+

Added support of :class:`~pathlib.Path` objects.

114+
112115
113116

.. method:: XMLReader.getContentHandler()

114117
Original file line numberDiff line numberDiff line change

@@ -18,6 +18,7 @@

1818

from io import BytesIO, StringIO

1919

import codecs

2020

import os.path

21+

import pathlib

2122

import shutil

2223

from urllib.error import URLError

2324

from test import support

@@ -182,6 +183,10 @@ def test_parse_bytes(self):

182183

with self.assertRaises(SAXException):

183184

self.check_parse(f)

184185
186+

def test_parse_path_object(self):

187+

make_xml_file(self.data, 'utf-8', None)

188+

self.check_parse(pathlib.Path(TESTFN))

189+
185190

def test_parse_InputSource(self):

186191

# accept data without declared but with explicitly specified encoding

187192

make_xml_file(self.data, 'iso-8859-1', None)

@@ -397,6 +402,13 @@ def test_string(self):

397402

self.checkContent(prep.getByteStream(),

398403

b"This was read from a file.")

399404
405+

def test_path_objects(self):

406+

# If the source is a Path object, use it as a system ID and open it.

407+

prep = prepare_input_source(pathlib.Path(self.file))

408+

self.assertIsNone(prep.getCharacterStream())

409+

self.checkContent(prep.getByteStream(),

410+

b"This was read from a file.")

411+
400412

def test_binary_file(self):

401413

# If the source is a binary file-like object, use it as a byte

402414

# stream.

Original file line numberDiff line numberDiff line change

@@ -340,6 +340,8 @@ def prepare_input_source(source, base=""):

340340

"""This function takes an InputSource and an optional base URL and

341341

returns a fully resolved InputSource object ready for reading."""

342342
343+

if isinstance(source, os.PathLike):

344+

source = os.fspath(source)

343345

if isinstance(source, str):

344346

source = xmlreader.InputSource(source)

345347

elif hasattr(source, "read"):

Original file line numberDiff line numberDiff line change

@@ -0,0 +1 @@

1+

Make xml.sax.parse accepting Path objects. Patch by Mickaël Schoentgen.