Auto merge of #128254 - Amanieu:orig-binary-search, r=tgross35 · model-checking/verify-rust-std@e13d132
@@ -7,7 +7,7 @@
77#![stable(feature = "rust1", since = "1.0.0")]
8899use crate::cmp::Ordering::{self, Equal, Greater, Less};
10-use crate::intrinsics::{exact_div, unchecked_sub};
10+use crate::intrinsics::{exact_div, select_unpredictable, unchecked_sub};
1111use crate::mem::{self, SizedTypeProperties};
1212use crate::num::NonZero;
1313use crate::ops::{Bound, OneSidedRange, Range, RangeBounds};
@@ -2770,41 +2770,54 @@ impl<T> [T] {
27702770where
27712771F: FnMut(&'a T) -> Ordering,
27722772{
2773-// INVARIANTS:
2774-// - 0 <= left <= left + size = right <= self.len()
2775-// - f returns Less for everything in self[..left]
2776-// - f returns Greater for everything in self[right..]
27772773let mut size = self.len();
2778-let mut left = 0;
2779-let mut right = size;
2780-while left < right {
2781-let mid = left + size / 2;
2782-2783-// SAFETY: the while condition means `size` is strictly positive, so
2784-// `size/2 < size`. Thus `left + size/2 < left + size`, which
2785-// coupled with the `left + size <= self.len()` invariant means
2786-// we have `left + size/2 < self.len()`, and this is in-bounds.
2774+if size == 0 {
2775+return Err(0);
2776+}
2777+let mut base = 0usize;
2778+2779+// This loop intentionally doesn't have an early exit if the comparison
2780+// returns Equal. We want the number of loop iterations to depend *only*
2781+// on the size of the input slice so that the CPU can reliably predict
2782+// the loop count.
2783+while size > 1 {
2784+let half = size / 2;
2785+let mid = base + half;
2786+2787+// SAFETY: the call is made safe by the following inconstants:
2788+// - `mid >= 0`: by definition
2789+// - `mid < size`: `mid = size / 2 + size / 4 + size / 8 ...`
27872790let cmp = f(unsafe { self.get_unchecked(mid) });
278827912789-// This control flow produces conditional moves, which results in
2790-// fewer branches and instructions than if/else or matching on
2791-// cmp::Ordering.
2792-// This is x86 asm for u8: https://rust.godbolt.org/z/698eYffTx.
2793- left = if cmp == Less { mid + 1 } else { left };
2794- right = if cmp == Greater { mid } else { right };
2795-if cmp == Equal {
2796-// SAFETY: same as the `get_unchecked` above
2797-unsafe { hint::assert_unchecked(mid < self.len()) };
2798-return Ok(mid);
2799-}
2800-2801- size = right - left;
2792+// Binary search interacts poorly with branch prediction, so force
2793+// the compiler to use conditional moves if supported by the target
2794+// architecture.
2795+ base = select_unpredictable(cmp == Greater, base, mid);
2796+2797+// This is imprecise in the case where `size` is odd and the
2798+// comparison returns Greater: the mid element still gets included
2799+// by `size` even though it's known to be larger than the element
2800+// being searched for.
2801+//
2802+// This is fine though: we gain more performance by keeping the
2803+// loop iteration count invariant (and thus predictable) than we
2804+// lose from considering one additional element.
2805+ size -= half;
28022806}
280328072804-// SAFETY: directly true from the overall invariant.
2805-// Note that this is `<=`, unlike the assume in the `Ok` path.
2806-unsafe { hint::assert_unchecked(left <= self.len()) };
2807-Err(left)
2808+// SAFETY: base is always in [0, size) because base <= mid.
2809+let cmp = f(unsafe { self.get_unchecked(base) });
2810+if cmp == Equal {
2811+// SAFETY: same as the `get_unchecked` above.
2812+unsafe { hint::assert_unchecked(base < self.len()) };
2813+Ok(base)
2814+} else {
2815+let result = base + (cmp == Less) as usize;
2816+// SAFETY: same as the `get_unchecked` above.
2817+// Note that this is `<=`, unlike the assume in the `Ok` path.
2818+unsafe { hint::assert_unchecked(result <= self.len()) };
2819+Err(result)
2820+}
28082821}
2809282228102823/// Binary searches this slice with a key extraction function.