bpo-37531: regrtest ignores output on timeout (GH-16659) · python/cpython@598bfa4

@@ -22,6 +22,12 @@

2222

PROGRESS_UPDATE = 30.0 # seconds

2323

assert PROGRESS_UPDATE >= PROGRESS_MIN_TIME

242425+

# Kill the main process after 5 minutes. It is supposed to write an update

26+

# every PROGRESS_UPDATE seconds. Tolerate 5 minutes for Python slowest

27+

# buildbot workers.

28+

MAIN_PROCESS_TIMEOUT = 5 * 60.0

29+

assert MAIN_PROCESS_TIMEOUT >= PROGRESS_UPDATE

30+2531

# Time to wait until a worker completes: should be immediate

2632

JOIN_TIMEOUT = 30.0 # seconds

2733

@@ -159,20 +165,6 @@ def mp_result_error(self, test_name, error_type, stdout='', stderr='',

159165

result = TestResult(test_name, error_type, test_time, None)

160166

return MultiprocessResult(result, stdout, stderr, err_msg)

161167162-

def _timedout(self, test_name):

163-

self._kill()

164-165-

stdout = stderr = ''

166-

popen = self._popen

167-

try:

168-

stdout, stderr = popen.communicate(timeout=JOIN_TIMEOUT)

169-

except (subprocess.TimeoutExpired, OSError) as exc:

170-

print_warning(f"Failed to read {self} output "

171-

f"(timeout={format_duration(JOIN_TIMEOUT)}): "

172-

f"{exc!r}")

173-174-

return self.mp_result_error(test_name, TIMEOUT, stdout, stderr)

175-176168

def _run_process(self, test_name):

177169

self.start_time = time.monotonic()

178170

@@ -195,23 +187,32 @@ def _run_process(self, test_name):

195187196188

try:

197189

stdout, stderr = popen.communicate(timeout=self.timeout)

190+

retcode = popen.returncode

191+

assert retcode is not None

198192

except subprocess.TimeoutExpired:

199193

if self._stopped:

200194

# kill() has been called: communicate() fails

201195

# on reading closed stdout/stderr

202196

raise ExitThread

203197204-

return self._timedout(test_name)

198+

# On timeout, kill the process

199+

self._kill()

200+201+

# None means TIMEOUT for the caller

202+

retcode = None

203+

# bpo-38207: Don't attempt to call communicate() again: on it

204+

# can hang until all child processes using stdout and stderr

205+

# pipes completes.

206+

stdout = stderr = ''

205207

except OSError:

206208

if self._stopped:

207209

# kill() has been called: communicate() fails

208210

# on reading closed stdout/stderr

209211

raise ExitThread

210212

raise

211-212-

retcode = popen.returncode

213-

stdout = stdout.strip()

214-

stderr = stderr.rstrip()

213+

else:

214+

stdout = stdout.strip()

215+

stderr = stderr.rstrip()

215216216217

return (retcode, stdout, stderr)

217218

except:

@@ -223,13 +224,10 @@ def _run_process(self, test_name):

223224

self.current_test_name = None

224225225226

def _runtest(self, test_name):

226-

result = self._run_process(test_name)

227-228-

if isinstance(result, MultiprocessResult):

229-

# _timedout() case

230-

return result

227+

retcode, stdout, stderr = self._run_process(test_name)

231228232-

retcode, stdout, stderr = result

229+

if retcode is None:

230+

return self.mp_result_error(test_name, TIMEOUT, stdout, stderr)

233231234232

err_msg = None

235233

if retcode != 0:

@@ -248,7 +246,8 @@ def _runtest(self, test_name):

248246

err_msg = "Failed to parse worker JSON: %s" % exc

249247250248

if err_msg is not None:

251-

return self.mp_result_error(test_name, CHILD_ERROR, stdout, stderr, err_msg)

249+

return self.mp_result_error(test_name, CHILD_ERROR,

250+

stdout, stderr, err_msg)

252251253252

return MultiprocessResult(result, stdout, stderr, err_msg)

254253

@@ -287,6 +286,16 @@ def _wait_completed(self):

287286

f"{exc!r}")

288287289288

def wait_stopped(self, start_time):

289+

# bpo-38207: MultiprocessTestRunner.stop_workers() called self.stop()

290+

# which killed the process. Sometimes, killing the process from the

291+

# main thread does not interrupt popen.communicate() in

292+

# TestWorkerProcess thread. This loop with a timeout is a workaround

293+

# for that.

294+

#

295+

# Moreover, if this method fails to join the thread, it is likely

296+

# that Python will hang at exit while calling threading._shutdown()

297+

# which tries again to join the blocked thread. Regrtest.main()

298+

# uses EXIT_TIMEOUT to workaround this second bug.

290299

while True:

291300

# Write a message every second

292301

self.join(1.0)

@@ -353,7 +362,8 @@ def _get_result(self):

353362

timeout = PROGRESS_UPDATE

354363

while True:

355364

if use_faulthandler:

356-

faulthandler.dump_traceback_later(timeout * 2.0, exit=True)

365+

faulthandler.dump_traceback_later(MAIN_PROCESS_TIMEOUT,

366+

exit=True)

357367358368

# wait for a thread

359369

try: