Merge tag 'kernel-6.15-rc1.tasklist_lock' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs

Pull tasklist_lock optimizations from Christian Brauner:
 "According to the performance testbots this brings a 23% performance
  increase when creating new processes:

   - Reduce tasklist_lock hold time on exit:
       - Perform add_device_randomness() without tasklist_lock
       - Perform free_pid() calls outside of tasklist_lock

   - Drop irq disablement around pidmap_lock

   - Add some tasklist_lock asserts

   - Call flush_sigqueue() lockless by changing release_task()

   - Don't pointlessly clear TIF_SIGPENDING in __exit_signal() ->
     clear_tsk_thread_flag()"

* tag 'kernel-6.15-rc1.tasklist_lock' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs:
  pid: drop irq disablement around pidmap_lock
  pid: perform free_pid() calls outside of tasklist_lock
  pid: sprinkle tasklist_lock asserts
  exit: hoist get_pid() in release_task() outside of tasklist_lock
  exit: perform add_device_randomness() without tasklist_lock
  exit: kill the pointless __exit_signal()->clear_tsk_thread_flag(TIF_SIGPENDING)
  exit: change the release_task() paths to call flush_sigqueue() lockless
This commit is contained in:
Linus Torvalds
2025-03-24 13:39:27 -07:00
4 changed files with 93 additions and 66 deletions

View File

@@ -88,20 +88,6 @@ struct pid_namespace init_pid_ns = {
};
EXPORT_SYMBOL_GPL(init_pid_ns);
/*
* Note: disable interrupts while the pidmap_lock is held as an
* interrupt might come in and do read_lock(&tasklist_lock).
*
* If we don't disable interrupts there is a nasty deadlock between
* detach_pid()->free_pid() and another cpu that does
* spin_lock(&pidmap_lock) followed by an interrupt routine that does
* read_lock(&tasklist_lock);
*
* After we clean up the tasklist_lock and know there are no
* irq handlers that take it we can leave the interrupts enabled.
* For now it is easier to be safe than to prove it can't happen.
*/
static __cacheline_aligned_in_smp DEFINE_SPINLOCK(pidmap_lock);
seqcount_spinlock_t pidmap_lock_seq = SEQCNT_SPINLOCK_ZERO(pidmap_lock_seq, &pidmap_lock);
@@ -128,11 +114,11 @@ static void delayed_put_pid(struct rcu_head *rhp)
void free_pid(struct pid *pid)
{
/* We can be called with write_lock_irq(&tasklist_lock) held */
int i;
unsigned long flags;
spin_lock_irqsave(&pidmap_lock, flags);
lockdep_assert_not_held(&tasklist_lock);
spin_lock(&pidmap_lock);
for (i = 0; i <= pid->level; i++) {
struct upid *upid = pid->numbers + i;
struct pid_namespace *ns = upid->ns;
@@ -155,11 +141,23 @@ void free_pid(struct pid *pid)
idr_remove(&ns->idr, upid->nr);
}
pidfs_remove_pid(pid);
spin_unlock_irqrestore(&pidmap_lock, flags);
spin_unlock(&pidmap_lock);
call_rcu(&pid->rcu, delayed_put_pid);
}
void free_pids(struct pid **pids)
{
int tmp;
/*
* This can batch pidmap_lock.
*/
for (tmp = PIDTYPE_MAX; --tmp >= 0; )
if (pids[tmp])
free_pid(pids[tmp]);
}
struct pid *alloc_pid(struct pid_namespace *ns, pid_t *set_tid,
size_t set_tid_size)
{
@@ -211,7 +209,7 @@ struct pid *alloc_pid(struct pid_namespace *ns, pid_t *set_tid,
}
idr_preload(GFP_KERNEL);
spin_lock_irq(&pidmap_lock);
spin_lock(&pidmap_lock);
if (tid) {
nr = idr_alloc(&tmp->idr, NULL, tid,
@@ -238,7 +236,7 @@ struct pid *alloc_pid(struct pid_namespace *ns, pid_t *set_tid,
nr = idr_alloc_cyclic(&tmp->idr, NULL, pid_min,
pid_max, GFP_ATOMIC);
}
spin_unlock_irq(&pidmap_lock);
spin_unlock(&pidmap_lock);
idr_preload_end();
if (nr < 0) {
@@ -272,7 +270,7 @@ struct pid *alloc_pid(struct pid_namespace *ns, pid_t *set_tid,
upid = pid->numbers + ns->level;
idr_preload(GFP_KERNEL);
spin_lock_irq(&pidmap_lock);
spin_lock(&pidmap_lock);
if (!(ns->pid_allocated & PIDNS_ADDING))
goto out_unlock;
pidfs_add_pid(pid);
@@ -281,18 +279,18 @@ struct pid *alloc_pid(struct pid_namespace *ns, pid_t *set_tid,
idr_replace(&upid->ns->idr, pid, upid->nr);
upid->ns->pid_allocated++;
}
spin_unlock_irq(&pidmap_lock);
spin_unlock(&pidmap_lock);
idr_preload_end();
return pid;
out_unlock:
spin_unlock_irq(&pidmap_lock);
spin_unlock(&pidmap_lock);
idr_preload_end();
put_pid_ns(ns);
out_free:
spin_lock_irq(&pidmap_lock);
spin_lock(&pidmap_lock);
while (++i <= ns->level) {
upid = pid->numbers + i;
idr_remove(&upid->ns->idr, upid->nr);
@@ -302,7 +300,7 @@ out_free:
if (ns->pid_allocated == PIDNS_ADDING)
idr_set_cursor(&ns->idr, 0);
spin_unlock_irq(&pidmap_lock);
spin_unlock(&pidmap_lock);
kmem_cache_free(ns->pid_cachep, pid);
return ERR_PTR(retval);
@@ -310,9 +308,9 @@ out_free:
void disable_pid_allocation(struct pid_namespace *ns)
{
spin_lock_irq(&pidmap_lock);
spin_lock(&pidmap_lock);
ns->pid_allocated &= ~PIDNS_ADDING;
spin_unlock_irq(&pidmap_lock);
spin_unlock(&pidmap_lock);
}
struct pid *find_pid_ns(int nr, struct pid_namespace *ns)
@@ -339,17 +337,23 @@ static struct pid **task_pid_ptr(struct task_struct *task, enum pid_type type)
*/
void attach_pid(struct task_struct *task, enum pid_type type)
{
struct pid *pid = *task_pid_ptr(task, type);
struct pid *pid;
lockdep_assert_held_write(&tasklist_lock);
pid = *task_pid_ptr(task, type);
hlist_add_head_rcu(&task->pid_links[type], &pid->tasks[type]);
}
static void __change_pid(struct task_struct *task, enum pid_type type,
struct pid *new)
static void __change_pid(struct pid **pids, struct task_struct *task,
enum pid_type type, struct pid *new)
{
struct pid **pid_ptr = task_pid_ptr(task, type);
struct pid *pid;
struct pid **pid_ptr, *pid;
int tmp;
lockdep_assert_held_write(&tasklist_lock);
pid_ptr = task_pid_ptr(task, type);
pid = *pid_ptr;
hlist_del_rcu(&task->pid_links[type]);
@@ -364,18 +368,19 @@ static void __change_pid(struct task_struct *task, enum pid_type type,
if (pid_has_task(pid, tmp))
return;
free_pid(pid);
WARN_ON(pids[type]);
pids[type] = pid;
}
void detach_pid(struct task_struct *task, enum pid_type type)
void detach_pid(struct pid **pids, struct task_struct *task, enum pid_type type)
{
__change_pid(task, type, NULL);
__change_pid(pids, task, type, NULL);
}
void change_pid(struct task_struct *task, enum pid_type type,
void change_pid(struct pid **pids, struct task_struct *task, enum pid_type type,
struct pid *pid)
{
__change_pid(task, type, pid);
__change_pid(pids, task, type, pid);
attach_pid(task, type);
}
@@ -386,6 +391,8 @@ void exchange_tids(struct task_struct *left, struct task_struct *right)
struct hlist_head *head1 = &pid1->tasks[PIDTYPE_PID];
struct hlist_head *head2 = &pid2->tasks[PIDTYPE_PID];
lockdep_assert_held_write(&tasklist_lock);
/* Swap the single entry tid lists */
hlists_swap_heads_rcu(head1, head2);
@@ -403,6 +410,7 @@ void transfer_pid(struct task_struct *old, struct task_struct *new,
enum pid_type type)
{
WARN_ON_ONCE(type == PIDTYPE_PID);
lockdep_assert_held_write(&tasklist_lock);
hlist_replace_rcu(&old->pid_links[type], &new->pid_links[type]);
}