bpo-37531: regrtest ignores output on timeout (GH-16659) · python/cpython@359a197

@@ -22,6 +22,12 @@

2222

PROGRESS_UPDATE = 30.0 # seconds

2323

assert PROGRESS_UPDATE >= PROGRESS_MIN_TIME

242425+

# Kill the main process after 5 minutes. It is supposed to write an update

26+

# every PROGRESS_UPDATE seconds. Tolerate 5 minutes for Python slowest

27+

# buildbot workers.

28+

MAIN_PROCESS_TIMEOUT = 5 * 60.0

29+

assert MAIN_PROCESS_TIMEOUT >= PROGRESS_UPDATE

30+2531

# Time to wait until a worker completes: should be immediate

2632

JOIN_TIMEOUT = 30.0 # seconds

2733

@@ -160,20 +166,6 @@ def mp_result_error(self, test_name, error_type, stdout='', stderr='',

160166

result = TestResult(test_name, error_type, test_time, None)

161167

return MultiprocessResult(result, stdout, stderr, err_msg)

162168163-

def _timedout(self, test_name):

164-

self._kill()

165-166-

stdout = stderr = ''

167-

popen = self._popen

168-

try:

169-

stdout, stderr = popen.communicate(timeout=JOIN_TIMEOUT)

170-

except (subprocess.TimeoutExpired, OSError) as exc:

171-

print_warning(f"Failed to read {self} output "

172-

f"(timeout={format_duration(JOIN_TIMEOUT)}): "

173-

f"{exc!r}")

174-175-

return self.mp_result_error(test_name, TIMEOUT, stdout, stderr)

176-177169

def _run_process(self, test_name):

178170

self.start_time = time.monotonic()

179171

@@ -197,23 +189,32 @@ def _run_process(self, test_name):

197189198190

try:

199191

stdout, stderr = popen.communicate(timeout=self.timeout)

192+

retcode = popen.returncode

193+

assert retcode is not None

200194

except subprocess.TimeoutExpired:

201195

if self._stopped:

202196

# kill() has been called: communicate() fails

203197

# on reading closed stdout/stderr

204198

raise ExitThread

205199206-

return self._timedout(test_name)

200+

# On timeout, kill the process

201+

self._kill()

202+203+

# None means TIMEOUT for the caller

204+

retcode = None

205+

# bpo-38207: Don't attempt to call communicate() again: on it

206+

# can hang until all child processes using stdout and stderr

207+

# pipes completes.

208+

stdout = stderr = ''

207209

except OSError:

208210

if self._stopped:

209211

# kill() has been called: communicate() fails

210212

# on reading closed stdout/stderr

211213

raise ExitThread

212214

raise

213-214-

retcode = popen.returncode

215-

stdout = stdout.strip()

216-

stderr = stderr.rstrip()

215+

else:

216+

stdout = stdout.strip()

217+

stderr = stderr.rstrip()

217218218219

return (retcode, stdout, stderr)

219220

except:

@@ -225,13 +226,10 @@ def _run_process(self, test_name):

225226

self.current_test_name = None

226227227228

def _runtest(self, test_name):

228-

result = self._run_process(test_name)

229-230-

if isinstance(result, MultiprocessResult):

231-

# _timedout() case

232-

return result

229+

retcode, stdout, stderr = self._run_process(test_name)

233230234-

retcode, stdout, stderr = result

231+

if retcode is None:

232+

return self.mp_result_error(test_name, TIMEOUT, stdout, stderr)

235233236234

err_msg = None

237235

if retcode != 0:

@@ -250,7 +248,8 @@ def _runtest(self, test_name):

250248

err_msg = "Failed to parse worker JSON: %s" % exc

251249252250

if err_msg is not None:

253-

return self.mp_result_error(test_name, CHILD_ERROR, stdout, stderr, err_msg)

251+

return self.mp_result_error(test_name, CHILD_ERROR,

252+

stdout, stderr, err_msg)

254253255254

return MultiprocessResult(result, stdout, stderr, err_msg)

256255

@@ -289,6 +288,16 @@ def _wait_completed(self):

289288

f"{exc!r}")

290289291290

def wait_stopped(self, start_time):

291+

# bpo-38207: MultiprocessTestRunner.stop_workers() called self.stop()

292+

# which killed the process. Sometimes, killing the process from the

293+

# main thread does not interrupt popen.communicate() in

294+

# TestWorkerProcess thread. This loop with a timeout is a workaround

295+

# for that.

296+

#

297+

# Moreover, if this method fails to join the thread, it is likely

298+

# that Python will hang at exit while calling threading._shutdown()

299+

# which tries again to join the blocked thread. Regrtest.main()

300+

# uses EXIT_TIMEOUT to workaround this second bug.

292301

while True:

293302

# Write a message every second

294303

self.join(1.0)

@@ -355,7 +364,8 @@ def _get_result(self):

355364

timeout = PROGRESS_UPDATE

356365

while True:

357366

if use_faulthandler:

358-

faulthandler.dump_traceback_later(timeout * 2.0, exit=True)

367+

faulthandler.dump_traceback_later(MAIN_PROCESS_TIMEOUT,

368+

exit=True)

359369360370

# wait for a thread

361371

try: