Implement all avx2 intrinsics used by the image crate · rust-lang/rust@4381949

@@ -304,7 +304,9 @@ pub(crate) fn codegen_x86_llvm_intrinsic_call<'tcx>(

304304

fx.bcx.ins().sshr(a_lane, saturated_count)

305305

});

306306

}

307-

"llvm.x86.sse2.psad.bw" => {

307+

"llvm.x86.sse2.psad.bw" | "llvm.x86.avx2.psad.bw" => {

308+

// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sad_epu8&ig_expand=5770

309+

// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_sad_epu8&ig_expand=5771

308310

intrinsic_args!(fx, args => (a, b); intrinsic);

309311310312

assert_eq!(a.layout(), b.layout());

@@ -335,7 +337,9 @@ pub(crate) fn codegen_x86_llvm_intrinsic_call<'tcx>(

335337

ret.place_lane(fx, out_lane_idx).write_cvalue(fx, res_lane);

336338

}

337339

}

338-

"llvm.x86.ssse3.pmadd.ub.sw.128" => {

340+

"llvm.x86.ssse3.pmadd.ub.sw.128" | "llvm.x86.avx2.pmadd.ub.sw" => {

341+

// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maddubs_epi16&ig_expand=4267

342+

// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maddubs_epi16&ig_expand=4270

339343

intrinsic_args!(fx, args => (a, b); intrinsic);

340344341345

let (lane_count, lane_ty) = a.layout().ty.simd_size_and_type(fx.tcx);

@@ -374,7 +378,9 @@ pub(crate) fn codegen_x86_llvm_intrinsic_call<'tcx>(

374378

ret.place_lane(fx, out_lane_idx).write_cvalue(fx, res_lane);

375379

}

376380

}

377-

"llvm.x86.sse2.pmadd.wd" => {

381+

"llvm.x86.sse2.pmadd.wd" | "llvm.x86.avx2.pmadd.wd" => {

382+

// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_madd_epi16&ig_expand=4231

383+

// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_madd_epi16&ig_expand=4234

378384

intrinsic_args!(fx, args => (a, b); intrinsic);

379385380386

assert_eq!(a.layout(), b.layout());