Rollup merge of #151346 - folkertdev:simd-splat, r=workingjubilee

add `simd_splat` intrinsic Add `simd_splat` which lowers to the LLVM canonical splat sequence. ```llvm insertelement <N x elem> poison, elem %x, i32 0 shufflevector <N x elem> v0, <N x elem> poison, <N x i32> zeroinitializer ``` Right now we try to fake it using one of ```rust fn splat(x: u32) -> u32x8 { u32x8::from_array([x; 8]) } ``` or (in `stdarch`) ```rust fn splat(value: $elem_type) -> $name { #[derive(Copy, Clone)] #[repr(simd)] struct JustOne([$elem_type; 1]); let one = JustOne([value]); // SAFETY: 0 is always in-bounds because we're shuffling // a simd type with exactly one element. unsafe { simd_shuffle!(one, one, [0; $len]) } } ``` Both of these can confuse the LLVM optimizer, producing sub-par code. Some examples: - https://github.com/rust-lang/rust/issues/60637 - https://github.com/rust-lang/rust/issues/137407 - https://github.com/rust-lang/rust/issues/122623 - https://github.com/rust-lang/rust/issues/97804 --- As far as I can tell there is no way to provide a fallback implementation for this intrinsic, because there is no `const` way of evaluating the number of elements (there might be issues beyond that, too). So, I added implementations for all 4 backends. Both GCC and const-eval appear to have some issues with simd vectors containing pointers. I have a workaround for GCC, but haven't yet been able to make const-eval work. See the comments below. Currently this just adds the intrinsic, it does not actually use it anywhere yet.
2026-01-24 23:17:31 +00:00 · 2026-01-24 21:04:15 +01:00
parent 99c44496ed 71f34429ac
commit 3a69035338
10 changed files with 192 additions and 1 deletions
--- a/compiler/rustc_codegen_cranelift/src/intrinsics/simd.rs
+++ b/compiler/rustc_codegen_cranelift/src/intrinsics/simd.rs
@@ -348,6 +348,31 @@ pub(super) fn codegen_simd_intrinsic_call<'tcx>(
            ret.write_cvalue(fx, ret_lane);
        }

+        sym::simd_splat => {
+            intrinsic_args!(fx, args => (value); intrinsic);
+
+            if !ret.layout().ty.is_simd() {
+                report_simd_type_validation_error(fx, intrinsic, span, ret.layout().ty);
+                return;
+            }
+            let (lane_count, lane_ty) = ret.layout().ty.simd_size_and_type(fx.tcx);
+
+            if value.layout().ty != lane_ty {
+                fx.tcx.dcx().span_fatal(
+                    span,
+                    format!(
+                        "[simd_splat] expected element type {lane_ty:?}, got {got:?}",
+                        got = value.layout().ty
+                    ),
+                );
+            }
+
+            for i in 0..lane_count {
+                let ret_lane = ret.place_lane(fx, i.into());
+                ret_lane.write_cvalue(fx, value);
+            }
+        }
+
        sym::simd_neg
        | sym::simd_bswap
        | sym::simd_bitreverse
--- a/compiler/rustc_codegen_gcc/src/intrinsic/simd.rs
+++ b/compiler/rustc_codegen_gcc/src/intrinsic/simd.rs
@@ -121,6 +121,42 @@ pub fn generic_simd_intrinsic<'a, 'gcc, 'tcx>(
        return Ok(bx.vector_select(vector_mask, arg1, args[2].immediate()));
    }

+    #[cfg(feature = "master")]
+    if name == sym::simd_splat {
+        let (out_len, out_ty) = require_simd2!(ret_ty, SimdReturn);
+
+        require!(
+            args[0].layout.ty == out_ty,
+            InvalidMonomorphization::ExpectedVectorElementType {
+                span,
+                name,
+                expected_element: out_ty,
+                vector_type: ret_ty,
+            }
+        );
+
+        let vec_ty = llret_ty.unqualified().dyncast_vector().expect("vector return type");
+        let elem_ty = vec_ty.get_element_type();
+
+        // Cast pointer type to usize (GCC does not support pointer SIMD vectors).
+        let value = args[0];
+        let scalar = if value.layout.ty.is_numeric() {
+            value.immediate()
+        } else if value.layout.ty.is_raw_ptr() {
+            bx.ptrtoint(value.immediate(), elem_ty)
+        } else {
+            return_error!(InvalidMonomorphization::UnsupportedOperation {
+                span,
+                name,
+                in_ty: ret_ty,
+                in_elem: value.layout.ty
+            });
+        };
+
+        let elements = vec![scalar; out_len as usize];
+        return Ok(bx.context.new_rvalue_from_vector(bx.location, llret_ty, &elements));
+    }
+
    // every intrinsic below takes a SIMD vector as its first argument
    require_simd!(
        args[0].layout.ty,
--- a/compiler/rustc_codegen_llvm/src/intrinsic.rs
+++ b/compiler/rustc_codegen_llvm/src/intrinsic.rs
@@ -1581,6 +1581,31 @@ fn generic_simd_intrinsic<'ll, 'tcx>(
        return Ok(bx.select(m_i1s, args[1].immediate(), args[2].immediate()));
    }

+    if name == sym::simd_splat {
+        let (_out_len, out_ty) = require_simd!(ret_ty, SimdReturn);
+
+        require!(
+            args[0].layout.ty == out_ty,
+            InvalidMonomorphization::ExpectedVectorElementType {
+                span,
+                name,
+                expected_element: out_ty,
+                vector_type: ret_ty,
+            }
+        );
+
+        // `insertelement <N x elem> poison, elem %x, i32 0`
+        let poison_vec = bx.const_poison(llret_ty);
+        let idx0 = bx.const_i32(0);
+        let v0 = bx.insert_element(poison_vec, args[0].immediate(), idx0);
+
+        // `shufflevector <N x elem> v0, <N x elem> poison, <N x i32> zeroinitializer`
+        // The masks is all zeros, so this splats lane 0 (which has our element in it).
+        let splat = bx.shuffle_vector(v0, poison_vec, bx.const_null(llret_ty));
+
+        return Ok(splat);
+    }
+
    // every intrinsic below takes a SIMD vector as its first argument
    let (in_len, in_elem) = require_simd!(args[0].layout.ty, SimdInput);
    let in_ty = args[0].layout.ty;
--- a/compiler/rustc_codegen_ssa/src/mir/operand.rs
+++ b/compiler/rustc_codegen_ssa/src/mir/operand.rs
@@ -1074,8 +1074,14 @@ impl<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>> FunctionCx<'a, 'tcx, Bx> {
                if constant_ty.is_simd() {
                    // However, some SIMD types do not actually use the vector ABI
                    // (in particular, packed SIMD types do not). Ensure we exclude those.
+                    //
+                    // We also have to exclude vectors of pointers because `immediate_const_vector`
+                    // does not work for those.
                    let layout = bx.layout_of(constant_ty);
-                    if let BackendRepr::SimdVector { .. } = layout.backend_repr {
+                    let (_, element_ty) = constant_ty.simd_size_and_type(bx.tcx());
+                    if let BackendRepr::SimdVector { .. } = layout.backend_repr
+                        && element_ty.is_numeric()
+                    {
                        let (llval, ty) = self.immediate_const_vector(bx, constant);
                        return OperandRef {
                            val: OperandValue::Immediate(llval),
--- a/compiler/rustc_const_eval/src/interpret/intrinsics/simd.rs
+++ b/compiler/rustc_const_eval/src/interpret/intrinsics/simd.rs
@@ -61,6 +61,15 @@ impl<'tcx, M: Machine<'tcx>> InterpCx<'tcx, M> {
                }
                self.copy_op(&self.project_index(&input, index)?, &dest)?;
            }
+            sym::simd_splat => {
+                let elem = &args[0];
+                let (dest, dest_len) = self.project_to_simd(&dest)?;
+
+                for i in 0..dest_len {
+                    let place = self.project_index(&dest, i)?;
+                    self.copy_op(elem, &place)?;
+                }
+            }
            sym::simd_neg
            | sym::simd_fabs
            | sym::simd_ceil
--- a/compiler/rustc_hir_analysis/src/check/intrinsic.rs
+++ b/compiler/rustc_hir_analysis/src/check/intrinsic.rs
@@ -746,6 +746,7 @@ pub(crate) fn check_intrinsic_type(
        sym::simd_extract | sym::simd_extract_dyn => {
            (2, 0, vec![param(0), tcx.types.u32], param(1))
        }
+        sym::simd_splat => (2, 0, vec![param(1)], param(0)),
        sym::simd_cast
        | sym::simd_as
        | sym::simd_cast_ptr
--- a/compiler/rustc_span/src/symbol.rs
+++ b/compiler/rustc_span/src/symbol.rs
@@ -2141,6 +2141,7 @@ symbols! {
        simd_shr,
        simd_shuffle,
        simd_shuffle_const_generic,
+        simd_splat,
        simd_sub,
        simd_trunc,
        simd_with_exposed_provenance,
--- a/library/core/src/intrinsics/simd.rs
+++ b/library/core/src/intrinsics/simd.rs
@@ -52,6 +52,13 @@ pub const unsafe fn simd_insert_dyn<T, U>(x: T, idx: u32, val: U) -> T;
 #[rustc_intrinsic]
 pub const unsafe fn simd_extract_dyn<T, U>(x: T, idx: u32) -> U;

+/// Creates a vector where every lane has the provided value.
+///
+/// `T` must be a vector with element type `U`.
+#[rustc_nounwind]
+#[rustc_intrinsic]
+pub const unsafe fn simd_splat<T, U>(value: U) -> T;
+
 /// Adds two simd vectors elementwise.
 ///
 /// `T` must be a vector of integers or floats.
--- a/tests/codegen-llvm/simd/splat.rs
+++ b/tests/codegen-llvm/simd/splat.rs
@@ -0,0 +1,33 @@
+//@ compile-flags: -Copt-level=3
+#![crate_type = "lib"]
+#![no_std]
+#![feature(repr_simd, core_intrinsics)]
+use core::intrinsics::simd::simd_splat;
+
+#[path = "../../auxiliary/minisimd.rs"]
+mod minisimd;
+use minisimd::*;
+
+// Test that `simd_splat` produces the canonical LLVM splat sequence.
+
+#[no_mangle]
+unsafe fn int(x: u16) -> u16x2 {
+    // CHECK-LABEL: int
+    // CHECK: start:
+    // CHECK-NEXT: %0 = insertelement <2 x i16> poison, i16 %x, i64 0
+    // CHECK-NEXT: %1 = shufflevector <2 x i16> %0, <2 x i16> poison, <2 x i32> zeroinitializer
+    // CHECK-NEXT: store
+    // CHECK-NEXT: ret
+    simd_splat(x)
+}
+
+#[no_mangle]
+unsafe fn float(x: f32) -> f32x4 {
+    // CHECK-LABEL: float
+    // CHECK: start:
+    // CHECK-NEXT: %0 = insertelement <4 x float> poison, float %x, i64 0
+    // CHECK-NEXT: %1 = shufflevector <4 x float> %0, <4 x float> poison, <4 x i32> zeroinitializer
+    // CHECK-NEXT: store
+    // CHECK-NEXT: ret
+    simd_splat(x)
+}
--- a/tests/ui/simd/intrinsic/splat.rs
+++ b/tests/ui/simd/intrinsic/splat.rs
@@ -0,0 +1,48 @@
+//@ run-pass
+#![feature(repr_simd, core_intrinsics)]
+
+#[path = "../../../auxiliary/minisimd.rs"]
+mod minisimd;
+use minisimd::*;
+
+use std::intrinsics::simd::simd_splat;
+
+fn main() {
+    unsafe {
+        let x: Simd<u32, 1> = simd_splat(123u32);
+        let y: Simd<u32, 1> = const { simd_splat(123u32) };
+        assert_eq!(x.into_array(), [123; 1]);
+        assert_eq!(x.into_array(), y.into_array());
+
+        let x: u16x2 = simd_splat(42u16);
+        let y: u16x2 = const { simd_splat(42u16) };
+        assert_eq!(x.into_array(), [42; 2]);
+        assert_eq!(x.into_array(), y.into_array());
+
+        let x: u128x4 = simd_splat(42u128);
+        let y: u128x4 = const { simd_splat(42u128) };
+        assert_eq!(x.into_array(), [42; 4]);
+        assert_eq!(x.into_array(), y.into_array());
+
+        let x: i32x4 = simd_splat(-7i32);
+        let y: i32x4 = const { simd_splat(-7i32) };
+        assert_eq!(x.into_array(), [-7; 4]);
+        assert_eq!(x.into_array(), y.into_array());
+
+        let x: f32x4 = simd_splat(42.0f32);
+        let y: f32x4 = const { simd_splat(42.0f32) };
+        assert_eq!(x.into_array(), [42.0; 4]);
+        assert_eq!(x.into_array(), y.into_array());
+
+        let x: f64x2 = simd_splat(42.0f64);
+        let y: f64x2 = const { simd_splat(42.0f64) };
+        assert_eq!(x.into_array(), [42.0; 2]);
+        assert_eq!(x.into_array(), y.into_array());
+
+        static ZERO: u8 = 0u8;
+        let x: Simd<*const u8, 2> = simd_splat(&raw const ZERO);
+        let y: Simd<*const u8, 2> = const { simd_splat(&raw const ZERO) };
+        assert_eq!(x.into_array(), [&raw const ZERO; 2]);
+        assert_eq!(x.into_array(), y.into_array());
+    }
+}