partially revert 8d597aa365

(cherry picked from commit 5e4168b162)
2026-01-25 07:48:44 +00:00 · 2026-01-03 21:36:43 +08:00
parent c5b220e1de
commit d851fe6684
2 changed files with 72 additions and 0 deletions
--- a/src/tools/miri/src/shims/x86/avx2.rs
+++ b/src/tools/miri/src/shims/x86/avx2.rs
@@ -245,6 +245,42 @@ pub(super) trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {

                shift_simd_by_scalar(this, left, right, which, dest)?;
            }
+            // Used to implement the _mm256_madd_epi16 function.
+            // Multiplies packed signed 16-bit integers in `left` and `right`, producing
+            // intermediate signed 32-bit integers. Horizontally add adjacent pairs of
+            // intermediate 32-bit integers, and pack the results in `dest`.
+            "pmadd.wd" => {
+                let [left, right] =
+                    this.check_shim_sig_lenient(abi, CanonAbi::C, link_name, args)?;
+
+                let (left, left_len) = this.project_to_simd(left)?;
+                let (right, right_len) = this.project_to_simd(right)?;
+                let (dest, dest_len) = this.project_to_simd(dest)?;
+
+                assert_eq!(left_len, right_len);
+                assert_eq!(dest_len.strict_mul(2), left_len);
+
+                for i in 0..dest_len {
+                    let j1 = i.strict_mul(2);
+                    let left1 = this.read_scalar(&this.project_index(&left, j1)?)?.to_i16()?;
+                    let right1 = this.read_scalar(&this.project_index(&right, j1)?)?.to_i16()?;
+
+                    let j2 = j1.strict_add(1);
+                    let left2 = this.read_scalar(&this.project_index(&left, j2)?)?.to_i16()?;
+                    let right2 = this.read_scalar(&this.project_index(&right, j2)?)?.to_i16()?;
+
+                    let dest = this.project_index(&dest, i)?;
+
+                    // Multiplications are i16*i16->i32, which will not overflow.
+                    let mul1 = i32::from(left1).strict_mul(right1.into());
+                    let mul2 = i32::from(left2).strict_mul(right2.into());
+                    // However, this addition can overflow in the most extreme case
+                    // (-0x8000)*(-0x8000)+(-0x8000)*(-0x8000) = 0x80000000
+                    let res = mul1.wrapping_add(mul2);
+
+                    this.write_scalar(Scalar::from_i32(res), &dest)?;
+                }
+            }
            _ => return interp_ok(EmulateItemResult::NotSupported),
        }
        interp_ok(EmulateItemResult::NeedsReturn)
--- a/src/tools/miri/src/shims/x86/sse2.rs
+++ b/src/tools/miri/src/shims/x86/sse2.rs
@@ -278,6 +278,42 @@ pub(super) trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
                    this.copy_op(&this.project_index(&left, i)?, &this.project_index(&dest, i)?)?;
                }
            }
+            // Used to implement the _mm_madd_epi16 function.
+            // Multiplies packed signed 16-bit integers in `left` and `right`, producing
+            // intermediate signed 32-bit integers. Horizontally add adjacent pairs of
+            // intermediate 32-bit integers, and pack the results in `dest`.
+            "pmadd.wd" => {
+                let [left, right] =
+                    this.check_shim_sig_lenient(abi, CanonAbi::C, link_name, args)?;
+
+                let (left, left_len) = this.project_to_simd(left)?;
+                let (right, right_len) = this.project_to_simd(right)?;
+                let (dest, dest_len) = this.project_to_simd(dest)?;
+
+                assert_eq!(left_len, right_len);
+                assert_eq!(dest_len.strict_mul(2), left_len);
+
+                for i in 0..dest_len {
+                    let j1 = i.strict_mul(2);
+                    let left1 = this.read_scalar(&this.project_index(&left, j1)?)?.to_i16()?;
+                    let right1 = this.read_scalar(&this.project_index(&right, j1)?)?.to_i16()?;
+
+                    let j2 = j1.strict_add(1);
+                    let left2 = this.read_scalar(&this.project_index(&left, j2)?)?.to_i16()?;
+                    let right2 = this.read_scalar(&this.project_index(&right, j2)?)?.to_i16()?;
+
+                    let dest = this.project_index(&dest, i)?;
+
+                    // Multiplications are i16*i16->i32, which will not overflow.
+                    let mul1 = i32::from(left1).strict_mul(right1.into());
+                    let mul2 = i32::from(left2).strict_mul(right2.into());
+                    // However, this addition can overflow in the most extreme case
+                    // (-0x8000)*(-0x8000)+(-0x8000)*(-0x8000) = 0x80000000
+                    let res = mul1.wrapping_add(mul2);
+
+                    this.write_scalar(Scalar::from_i32(res), &dest)?;
+                }
+            }
            _ => return interp_ok(EmulateItemResult::NotSupported),
        }
        interp_ok(EmulateItemResult::NeedsReturn)