bpo-37531: Fix regrtest timeout for subprocesses (GH-15072) · python/cpython@b0c8369
@@ -13,7 +13,7 @@
13131414from test.libregrtest.runtest import (
1515runtest, INTERRUPTED, CHILD_ERROR, PROGRESS_MIN_TIME,
16-format_test_result, TestResult, is_failed)
16+format_test_result, TestResult, is_failed, TIMEOUT)
1717from test.libregrtest.setup import setup_tests
1818from test.libregrtest.utils import format_duration
1919@@ -103,11 +103,12 @@ class ExitThread(Exception):
103103104104105105class MultiprocessThread(threading.Thread):
106-def __init__(self, pending, output, ns):
106+def __init__(self, pending, output, ns, timeout):
107107super().__init__()
108108self.pending = pending
109109self.output = output
110110self.ns = ns
111+self.timeout = timeout
111112self.current_test_name = None
112113self.start_time = None
113114self._popen = None
@@ -126,6 +127,12 @@ def __repr__(self):
126127return '<%s>' % ' '.join(info)
127128128129def kill(self):
130+"""
131+ Kill the current process (if any).
132+133+ This method can be called by the thread running the process,
134+ or by another thread.
135+ """
129136self._killed = True
130137131138popen = self._popen
@@ -136,6 +143,13 @@ def kill(self):
136143# does not hang
137144popen.stdout.close()
138145popen.stderr.close()
146+popen.wait()
147+148+def mp_result_error(self, test_name, error_type, stdout='', stderr='',
149+err_msg=None):
150+test_time = time.monotonic() - self.start_time
151+result = TestResult(test_name, error_type, test_time, None)
152+return MultiprocessResult(result, stdout, stderr, err_msg)
139153140154def _runtest(self, test_name):
141155try:
@@ -154,7 +168,19 @@ def _runtest(self, test_name):
154168raise ExitThread
155169156170try:
171+stdout, stderr = popen.communicate(timeout=self.timeout)
172+except subprocess.TimeoutExpired:
173+if self._killed:
174+# kill() has been called: communicate() fails
175+# on reading closed stdout/stderr
176+raise ExitThread
177+178+popen.kill()
157179stdout, stderr = popen.communicate()
180+self.kill()
181+182+return self.mp_result_error(test_name, TIMEOUT,
183+stdout, stderr)
158184except OSError:
159185if self._killed:
160186# kill() has been called: communicate() fails
@@ -163,7 +189,6 @@ def _runtest(self, test_name):
163189raise
164190except:
165191self.kill()
166-popen.wait()
167192raise
168193169194retcode = popen.wait()
@@ -191,8 +216,7 @@ def _runtest(self, test_name):
191216err_msg = "Failed to parse worker JSON: %s" % exc
192217193218if err_msg is not None:
194-test_time = time.monotonic() - self.start_time
195-result = TestResult(test_name, CHILD_ERROR, test_time, None)
219+return self.mp_result_error(test_name, CHILD_ERROR, stdout, stderr, err_msg)
196220197221return MultiprocessResult(result, stdout, stderr, err_msg)
198222@@ -236,13 +260,16 @@ def __init__(self, regrtest):
236260self.output = queue.Queue()
237261self.pending = MultiprocessIterator(self.regrtest.tests)
238262if self.ns.timeout is not None:
239-self.test_timeout = self.ns.timeout * 1.5
263+self.worker_timeout = self.ns.timeout * 1.5
264+self.main_timeout = self.ns.timeout * 2.0
240265else:
241-self.test_timeout = None
266+self.worker_timeout = None
267+self.main_timeout = None
242268self.workers = None
243269244270def start_workers(self):
245-self.workers = [MultiprocessThread(self.pending, self.output, self.ns)
271+self.workers = [MultiprocessThread(self.pending, self.output,
272+self.ns, self.worker_timeout)
246273for _ in range(self.ns.use_mp)]
247274print("Run tests in parallel using %s child processes"
248275% len(self.workers))
@@ -274,8 +301,8 @@ def _get_result(self):
274301return None
275302276303while True:
277-if self.test_timeout is not None:
278-faulthandler.dump_traceback_later(self.test_timeout, exit=True)
304+if self.main_timeout is not None:
305+faulthandler.dump_traceback_later(self.main_timeout, exit=True)
279306280307# wait for a thread
281308timeout = max(PROGRESS_UPDATE, PROGRESS_MIN_TIME)
@@ -343,7 +370,7 @@ def run_tests(self):
343370print()
344371self.regrtest.interrupted = True
345372finally:
346-if self.test_timeout is not None:
373+if self.main_timeout is not None:
347374faulthandler.cancel_dump_traceback_later()
348375349376# a test failed (and --failfast is set) or all tests completed