bpo-35892: Fix mode() and add multimode() by rhettinger · Pull Request #12089

bpo-35892: Fix mode() and add multimode() by rhettinger · Pull Request #12089 · python/cpython

Expand Up @@ -17,6 +17,7 @@ median_high High median of data. median_grouped Median, or 50th percentile, of grouped data. mode Mode (most common value) of data. multimode List of modes (most common values of data) ================== =============================================
Calculate the arithmetic mean ("the average") of data: Expand Down Expand Up @@ -79,10 +80,9 @@ __all__ = [ 'StatisticsError', 'NormalDist', 'pstdev', 'pvariance', 'stdev', 'variance', 'median', 'median_low', 'median_high', 'median_grouped', 'mean', 'mode', 'harmonic_mean', 'fmean', 'mean', 'mode', 'multimode', 'harmonic_mean', 'fmean', ]
import collections import math import numbers import random Expand All @@ -92,8 +92,8 @@ from itertools import groupby from bisect import bisect_left, bisect_right from math import hypot, sqrt, fabs, exp, erf, tau, log, fsum

from operator import itemgetter from collections import Counter
# === Exceptions ===
Expand Down Expand Up @@ -249,20 +249,6 @@ def _convert(value, T): raise

def _counts(data): # Generate a table of sorted (value, frequency) pairs. table = collections.Counter(iter(data)).most_common() if not table: return table # Extract the values with the highest frequency. maxfreq = table[0][1] for i in range(1, len(table)): if table[i][1] != maxfreq: table = table[:i] break return table

def _find_lteq(a, x): 'Locate the leftmost value exactly equal to x' i = bisect_left(a, x) Expand Down Expand Up @@ -334,9 +320,9 @@ def count(x): nonlocal n n += 1 return x total = math.fsum(map(count, data)) total = fsum(map(count, data)) else: total = math.fsum(data) total = fsum(data) try: return total / n except ZeroDivisionError: Expand Down Expand Up @@ -523,19 +509,38 @@ def mode(data): >>> mode(["red", "blue", "blue", "red", "green", "red", "red"]) 'red'
If there is not exactly one most common value, ``mode`` will raise StatisticsError. If there are multiple modes, return the first one encountered.
>>> mode(['red', 'red', 'green', 'blue', 'blue']) 'red'
If *data* is empty, ``mode``, raises StatisticsError.
""" # Generate a table of sorted (value, frequency) pairs. table = _counts(data) if len(table) == 1: return table[0][0] elif table: raise StatisticsError( 'no unique mode; found %d equally common values' % len(table) ) else: raise StatisticsError('no mode for empty data') data = iter(data) try: return Counter(data).most_common(1)[0][0] except IndexError: raise StatisticsError('no mode for empty data') from None

def multimode(data): """ Return a list of the most frequently occurring values.
Will return more than one result if there are multiple modes or an empty list if *data* is empty.
>>> multimode('aabbbbbbbbcc') ['b'] >>> multimode('aabbbbccddddeeffffgg') ['b', 'd', 'f'] >>> multimode('') []
""" counts = Counter(iter(data)).most_common() maxcount, mode_items = next(groupby(counts, key=itemgetter(1)), (0, [])) return list(map(itemgetter(0), mode_items))

# === Measures of spread === Expand Down Expand Up @@ -836,6 +841,7 @@ def __repr__(self): from math import isclose from operator import add, sub, mul, truediv from itertools import repeat import doctest
g1 = NormalDist(10, 20) g2 = NormalDist(-5, 25) Expand Down Expand Up @@ -893,3 +899,5 @@ def assert_close(G1, G2): S = NormalDist.from_samples([x - y for x, y in zip(X.samples(n), Y.samples(n))]) assert_close(X - Y, S)
print(doctest.testmod())