futex: Create hb scopes

Create explicit scopes for hb variables; almost pure re-indent.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lore.kernel.org/r/20250416162921.513656-6-bigeasy@linutronix.de
This commit is contained in:
Peter Zijlstra
2025-04-16 18:29:05 +02:00
parent 2fb292096d
commit 8486d12f55
4 changed files with 504 additions and 485 deletions

View File

@@ -944,7 +944,6 @@ static void exit_pi_state_list(struct task_struct *curr)
{
struct list_head *next, *head = &curr->pi_state_list;
struct futex_pi_state *pi_state;
struct futex_hash_bucket *hb;
union futex_key key = FUTEX_KEY_INIT;
/*
@@ -957,50 +956,54 @@ static void exit_pi_state_list(struct task_struct *curr)
next = head->next;
pi_state = list_entry(next, struct futex_pi_state, list);
key = pi_state->key;
hb = futex_hash(&key);
if (1) {
struct futex_hash_bucket *hb;
/*
* We can race against put_pi_state() removing itself from the
* list (a waiter going away). put_pi_state() will first
* decrement the reference count and then modify the list, so
* its possible to see the list entry but fail this reference
* acquire.
*
* In that case; drop the locks to let put_pi_state() make
* progress and retry the loop.
*/
if (!refcount_inc_not_zero(&pi_state->refcount)) {
hb = futex_hash(&key);
/*
* We can race against put_pi_state() removing itself from the
* list (a waiter going away). put_pi_state() will first
* decrement the reference count and then modify the list, so
* its possible to see the list entry but fail this reference
* acquire.
*
* In that case; drop the locks to let put_pi_state() make
* progress and retry the loop.
*/
if (!refcount_inc_not_zero(&pi_state->refcount)) {
raw_spin_unlock_irq(&curr->pi_lock);
cpu_relax();
raw_spin_lock_irq(&curr->pi_lock);
continue;
}
raw_spin_unlock_irq(&curr->pi_lock);
cpu_relax();
raw_spin_lock_irq(&curr->pi_lock);
continue;
}
raw_spin_unlock_irq(&curr->pi_lock);
spin_lock(&hb->lock);
raw_spin_lock_irq(&pi_state->pi_mutex.wait_lock);
raw_spin_lock(&curr->pi_lock);
/*
* We dropped the pi-lock, so re-check whether this
* task still owns the PI-state:
*/
if (head->next != next) {
/* retain curr->pi_lock for the loop invariant */
raw_spin_unlock(&pi_state->pi_mutex.wait_lock);
spin_lock(&hb->lock);
raw_spin_lock_irq(&pi_state->pi_mutex.wait_lock);
raw_spin_lock(&curr->pi_lock);
/*
* We dropped the pi-lock, so re-check whether this
* task still owns the PI-state:
*/
if (head->next != next) {
/* retain curr->pi_lock for the loop invariant */
raw_spin_unlock(&pi_state->pi_mutex.wait_lock);
spin_unlock(&hb->lock);
put_pi_state(pi_state);
continue;
}
WARN_ON(pi_state->owner != curr);
WARN_ON(list_empty(&pi_state->list));
list_del_init(&pi_state->list);
pi_state->owner = NULL;
raw_spin_unlock(&curr->pi_lock);
raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock);
spin_unlock(&hb->lock);
put_pi_state(pi_state);
continue;
}
WARN_ON(pi_state->owner != curr);
WARN_ON(list_empty(&pi_state->list));
list_del_init(&pi_state->list);
pi_state->owner = NULL;
raw_spin_unlock(&curr->pi_lock);
raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock);
spin_unlock(&hb->lock);
rt_mutex_futex_unlock(&pi_state->pi_mutex);
put_pi_state(pi_state);

View File

@@ -920,7 +920,6 @@ int futex_lock_pi(u32 __user *uaddr, unsigned int flags, ktime_t *time, int tryl
struct hrtimer_sleeper timeout, *to;
struct task_struct *exiting = NULL;
struct rt_mutex_waiter rt_waiter;
struct futex_hash_bucket *hb;
struct futex_q q = futex_q_init;
DEFINE_WAKE_Q(wake_q);
int res, ret;
@@ -939,152 +938,169 @@ retry:
goto out;
retry_private:
hb = futex_hash(&q.key);
futex_q_lock(&q, hb);
if (1) {
struct futex_hash_bucket *hb;
ret = futex_lock_pi_atomic(uaddr, hb, &q.key, &q.pi_state, current,
&exiting, 0);
if (unlikely(ret)) {
/*
* Atomic work succeeded and we got the lock,
* or failed. Either way, we do _not_ block.
*/
switch (ret) {
case 1:
/* We got the lock. */
ret = 0;
goto out_unlock_put_key;
case -EFAULT:
goto uaddr_faulted;
case -EBUSY:
case -EAGAIN:
hb = futex_hash(&q.key);
futex_q_lock(&q, hb);
ret = futex_lock_pi_atomic(uaddr, hb, &q.key, &q.pi_state, current,
&exiting, 0);
if (unlikely(ret)) {
/*
* Two reasons for this:
* - EBUSY: Task is exiting and we just wait for the
* exit to complete.
* - EAGAIN: The user space value changed.
* Atomic work succeeded and we got the lock,
* or failed. Either way, we do _not_ block.
*/
futex_q_unlock(hb);
/*
* Handle the case where the owner is in the middle of
* exiting. Wait for the exit to complete otherwise
* this task might loop forever, aka. live lock.
*/
wait_for_owner_exiting(ret, exiting);
cond_resched();
goto retry;
default:
goto out_unlock_put_key;
switch (ret) {
case 1:
/* We got the lock. */
ret = 0;
goto out_unlock_put_key;
case -EFAULT:
goto uaddr_faulted;
case -EBUSY:
case -EAGAIN:
/*
* Two reasons for this:
* - EBUSY: Task is exiting and we just wait for the
* exit to complete.
* - EAGAIN: The user space value changed.
*/
futex_q_unlock(hb);
/*
* Handle the case where the owner is in the middle of
* exiting. Wait for the exit to complete otherwise
* this task might loop forever, aka. live lock.
*/
wait_for_owner_exiting(ret, exiting);
cond_resched();
goto retry;
default:
goto out_unlock_put_key;
}
}
}
WARN_ON(!q.pi_state);
WARN_ON(!q.pi_state);
/*
* Only actually queue now that the atomic ops are done:
*/
__futex_queue(&q, hb, current);
/*
* Only actually queue now that the atomic ops are done:
*/
__futex_queue(&q, hb, current);
if (trylock) {
ret = rt_mutex_futex_trylock(&q.pi_state->pi_mutex);
/* Fixup the trylock return value: */
ret = ret ? 0 : -EWOULDBLOCK;
goto no_block;
}
if (trylock) {
ret = rt_mutex_futex_trylock(&q.pi_state->pi_mutex);
/* Fixup the trylock return value: */
ret = ret ? 0 : -EWOULDBLOCK;
goto no_block;
}
/*
* Must be done before we enqueue the waiter, here is unfortunately
* under the hb lock, but that *should* work because it does nothing.
*/
rt_mutex_pre_schedule();
/*
* Must be done before we enqueue the waiter, here is unfortunately
* under the hb lock, but that *should* work because it does nothing.
*/
rt_mutex_pre_schedule();
rt_mutex_init_waiter(&rt_waiter);
rt_mutex_init_waiter(&rt_waiter);
/*
* On PREEMPT_RT, when hb->lock becomes an rt_mutex, we must not
* hold it while doing rt_mutex_start_proxy(), because then it will
* include hb->lock in the blocking chain, even through we'll not in
* fact hold it while blocking. This will lead it to report -EDEADLK
* and BUG when futex_unlock_pi() interleaves with this.
*
* Therefore acquire wait_lock while holding hb->lock, but drop the
* latter before calling __rt_mutex_start_proxy_lock(). This
* interleaves with futex_unlock_pi() -- which does a similar lock
* handoff -- such that the latter can observe the futex_q::pi_state
* before __rt_mutex_start_proxy_lock() is done.
*/
raw_spin_lock_irq(&q.pi_state->pi_mutex.wait_lock);
spin_unlock(q.lock_ptr);
/*
* __rt_mutex_start_proxy_lock() unconditionally enqueues the @rt_waiter
* such that futex_unlock_pi() is guaranteed to observe the waiter when
* it sees the futex_q::pi_state.
*/
ret = __rt_mutex_start_proxy_lock(&q.pi_state->pi_mutex, &rt_waiter, current, &wake_q);
raw_spin_unlock_irq_wake(&q.pi_state->pi_mutex.wait_lock, &wake_q);
/*
* On PREEMPT_RT, when hb->lock becomes an rt_mutex, we must not
* hold it while doing rt_mutex_start_proxy(), because then it will
* include hb->lock in the blocking chain, even through we'll not in
* fact hold it while blocking. This will lead it to report -EDEADLK
* and BUG when futex_unlock_pi() interleaves with this.
*
* Therefore acquire wait_lock while holding hb->lock, but drop the
* latter before calling __rt_mutex_start_proxy_lock(). This
* interleaves with futex_unlock_pi() -- which does a similar lock
* handoff -- such that the latter can observe the futex_q::pi_state
* before __rt_mutex_start_proxy_lock() is done.
*/
raw_spin_lock_irq(&q.pi_state->pi_mutex.wait_lock);
spin_unlock(q.lock_ptr);
/*
* __rt_mutex_start_proxy_lock() unconditionally enqueues the @rt_waiter
* such that futex_unlock_pi() is guaranteed to observe the waiter when
* it sees the futex_q::pi_state.
*/
ret = __rt_mutex_start_proxy_lock(&q.pi_state->pi_mutex, &rt_waiter, current, &wake_q);
raw_spin_unlock_irq_wake(&q.pi_state->pi_mutex.wait_lock, &wake_q);
if (ret) {
if (ret == 1)
ret = 0;
goto cleanup;
}
if (ret) {
if (ret == 1)
ret = 0;
goto cleanup;
}
if (unlikely(to))
hrtimer_sleeper_start_expires(to, HRTIMER_MODE_ABS);
if (unlikely(to))
hrtimer_sleeper_start_expires(to, HRTIMER_MODE_ABS);
ret = rt_mutex_wait_proxy_lock(&q.pi_state->pi_mutex, to, &rt_waiter);
ret = rt_mutex_wait_proxy_lock(&q.pi_state->pi_mutex, to, &rt_waiter);
cleanup:
/*
* If we failed to acquire the lock (deadlock/signal/timeout), we must
* must unwind the above, however we canont lock hb->lock because
* rt_mutex already has a waiter enqueued and hb->lock can itself try
* and enqueue an rt_waiter through rtlock.
*
* Doing the cleanup without holding hb->lock can cause inconsistent
* state between hb and pi_state, but only in the direction of not
* seeing a waiter that is leaving.
*
* See futex_unlock_pi(), it deals with this inconsistency.
*
* There be dragons here, since we must deal with the inconsistency on
* the way out (here), it is impossible to detect/warn about the race
* the other way around (missing an incoming waiter).
*
* What could possibly go wrong...
*/
if (ret && !rt_mutex_cleanup_proxy_lock(&q.pi_state->pi_mutex, &rt_waiter))
ret = 0;
/*
* If we failed to acquire the lock (deadlock/signal/timeout), we must
* unwind the above, however we canont lock hb->lock because
* rt_mutex already has a waiter enqueued and hb->lock can itself try
* and enqueue an rt_waiter through rtlock.
*
* Doing the cleanup without holding hb->lock can cause inconsistent
* state between hb and pi_state, but only in the direction of not
* seeing a waiter that is leaving.
*
* See futex_unlock_pi(), it deals with this inconsistency.
*
* There be dragons here, since we must deal with the inconsistency on
* the way out (here), it is impossible to detect/warn about the race
* the other way around (missing an incoming waiter).
*
* What could possibly go wrong...
*/
if (ret && !rt_mutex_cleanup_proxy_lock(&q.pi_state->pi_mutex, &rt_waiter))
ret = 0;
/*
* Now that the rt_waiter has been dequeued, it is safe to use
* spinlock/rtlock (which might enqueue its own rt_waiter) and fix up
* the
*/
spin_lock(q.lock_ptr);
/*
* Waiter is unqueued.
*/
rt_mutex_post_schedule();
/*
* Now that the rt_waiter has been dequeued, it is safe to use
* spinlock/rtlock (which might enqueue its own rt_waiter) and fix up
* the
*/
spin_lock(q.lock_ptr);
/*
* Waiter is unqueued.
*/
rt_mutex_post_schedule();
no_block:
/*
* Fixup the pi_state owner and possibly acquire the lock if we
* haven't already.
*/
res = fixup_pi_owner(uaddr, &q, !ret);
/*
* If fixup_pi_owner() returned an error, propagate that. If it acquired
* the lock, clear our -ETIMEDOUT or -EINTR.
*/
if (res)
ret = (res < 0) ? res : 0;
/*
* Fixup the pi_state owner and possibly acquire the lock if we
* haven't already.
*/
res = fixup_pi_owner(uaddr, &q, !ret);
/*
* If fixup_pi_owner() returned an error, propagate that. If it acquired
* the lock, clear our -ETIMEDOUT or -EINTR.
*/
if (res)
ret = (res < 0) ? res : 0;
futex_unqueue_pi(&q);
spin_unlock(q.lock_ptr);
goto out;
futex_unqueue_pi(&q);
spin_unlock(q.lock_ptr);
goto out;
out_unlock_put_key:
futex_q_unlock(hb);
futex_q_unlock(hb);
goto out;
uaddr_faulted:
futex_q_unlock(hb);
ret = fault_in_user_writeable(uaddr);
if (ret)
goto out;
if (!(flags & FLAGS_SHARED))
goto retry_private;
goto retry;
}
out:
if (to) {
@@ -1092,18 +1108,6 @@ out:
destroy_hrtimer_on_stack(&to->timer);
}
return ret != -EINTR ? ret : -ERESTARTNOINTR;
uaddr_faulted:
futex_q_unlock(hb);
ret = fault_in_user_writeable(uaddr);
if (ret)
goto out;
if (!(flags & FLAGS_SHARED))
goto retry_private;
goto retry;
}
/*

View File

@@ -371,7 +371,6 @@ int futex_requeue(u32 __user *uaddr1, unsigned int flags1,
union futex_key key1 = FUTEX_KEY_INIT, key2 = FUTEX_KEY_INIT;
int task_count = 0, ret;
struct futex_pi_state *pi_state = NULL;
struct futex_hash_bucket *hb1, *hb2;
struct futex_q *this, *next;
DEFINE_WAKE_Q(wake_q);
@@ -443,240 +442,244 @@ retry:
if (requeue_pi && futex_match(&key1, &key2))
return -EINVAL;
hb1 = futex_hash(&key1);
hb2 = futex_hash(&key2);
retry_private:
futex_hb_waiters_inc(hb2);
double_lock_hb(hb1, hb2);
if (1) {
struct futex_hash_bucket *hb1, *hb2;
if (likely(cmpval != NULL)) {
u32 curval;
hb1 = futex_hash(&key1);
hb2 = futex_hash(&key2);
ret = futex_get_value_locked(&curval, uaddr1);
futex_hb_waiters_inc(hb2);
double_lock_hb(hb1, hb2);
if (unlikely(ret)) {
double_unlock_hb(hb1, hb2);
futex_hb_waiters_dec(hb2);
if (likely(cmpval != NULL)) {
u32 curval;
ret = get_user(curval, uaddr1);
if (ret)
return ret;
ret = futex_get_value_locked(&curval, uaddr1);
if (!(flags1 & FLAGS_SHARED))
goto retry_private;
if (unlikely(ret)) {
double_unlock_hb(hb1, hb2);
futex_hb_waiters_dec(hb2);
goto retry;
}
if (curval != *cmpval) {
ret = -EAGAIN;
goto out_unlock;
}
}
ret = get_user(curval, uaddr1);
if (ret)
return ret;
if (requeue_pi) {
struct task_struct *exiting = NULL;
if (!(flags1 & FLAGS_SHARED))
goto retry_private;
/*
* Attempt to acquire uaddr2 and wake the top waiter. If we
* intend to requeue waiters, force setting the FUTEX_WAITERS
* bit. We force this here where we are able to easily handle
* faults rather in the requeue loop below.
*
* Updates topwaiter::requeue_state if a top waiter exists.
*/
ret = futex_proxy_trylock_atomic(uaddr2, hb1, hb2, &key1,
&key2, &pi_state,
&exiting, nr_requeue);
/*
* At this point the top_waiter has either taken uaddr2 or
* is waiting on it. In both cases pi_state has been
* established and an initial refcount on it. In case of an
* error there's nothing.
*
* The top waiter's requeue_state is up to date:
*
* - If the lock was acquired atomically (ret == 1), then
* the state is Q_REQUEUE_PI_LOCKED.
*
* The top waiter has been dequeued and woken up and can
* return to user space immediately. The kernel/user
* space state is consistent. In case that there must be
* more waiters requeued the WAITERS bit in the user
* space futex is set so the top waiter task has to go
* into the syscall slowpath to unlock the futex. This
* will block until this requeue operation has been
* completed and the hash bucket locks have been
* dropped.
*
* - If the trylock failed with an error (ret < 0) then
* the state is either Q_REQUEUE_PI_NONE, i.e. "nothing
* happened", or Q_REQUEUE_PI_IGNORE when there was an
* interleaved early wakeup.
*
* - If the trylock did not succeed (ret == 0) then the
* state is either Q_REQUEUE_PI_IN_PROGRESS or
* Q_REQUEUE_PI_WAIT if an early wakeup interleaved.
* This will be cleaned up in the loop below, which
* cannot fail because futex_proxy_trylock_atomic() did
* the same sanity checks for requeue_pi as the loop
* below does.
*/
switch (ret) {
case 0:
/* We hold a reference on the pi state. */
break;
case 1:
/*
* futex_proxy_trylock_atomic() acquired the user space
* futex. Adjust task_count.
*/
task_count++;
ret = 0;
break;
/*
* If the above failed, then pi_state is NULL and
* waiter::requeue_state is correct.
*/
case -EFAULT:
double_unlock_hb(hb1, hb2);
futex_hb_waiters_dec(hb2);
ret = fault_in_user_writeable(uaddr2);
if (!ret)
goto retry;
return ret;
case -EBUSY:
case -EAGAIN:
/*
* Two reasons for this:
* - EBUSY: Owner is exiting and we just wait for the
* exit to complete.
* - EAGAIN: The user space value changed.
*/
double_unlock_hb(hb1, hb2);
futex_hb_waiters_dec(hb2);
/*
* Handle the case where the owner is in the middle of
* exiting. Wait for the exit to complete otherwise
* this task might loop forever, aka. live lock.
*/
wait_for_owner_exiting(ret, exiting);
cond_resched();
goto retry;
default:
goto out_unlock;
}
}
plist_for_each_entry_safe(this, next, &hb1->chain, list) {
if (task_count - nr_wake >= nr_requeue)
break;
if (!futex_match(&this->key, &key1))
continue;
/*
* FUTEX_WAIT_REQUEUE_PI and FUTEX_CMP_REQUEUE_PI should always
* be paired with each other and no other futex ops.
*
* We should never be requeueing a futex_q with a pi_state,
* which is awaiting a futex_unlock_pi().
*/
if ((requeue_pi && !this->rt_waiter) ||
(!requeue_pi && this->rt_waiter) ||
this->pi_state) {
ret = -EINVAL;
break;
}
if (curval != *cmpval) {
ret = -EAGAIN;
goto out_unlock;
}
}
/* Plain futexes just wake or requeue and are done */
if (!requeue_pi) {
if (++task_count <= nr_wake)
this->wake(&wake_q, this);
else
if (requeue_pi) {
struct task_struct *exiting = NULL;
/*
* Attempt to acquire uaddr2 and wake the top waiter. If we
* intend to requeue waiters, force setting the FUTEX_WAITERS
* bit. We force this here where we are able to easily handle
* faults rather in the requeue loop below.
*
* Updates topwaiter::requeue_state if a top waiter exists.
*/
ret = futex_proxy_trylock_atomic(uaddr2, hb1, hb2, &key1,
&key2, &pi_state,
&exiting, nr_requeue);
/*
* At this point the top_waiter has either taken uaddr2 or
* is waiting on it. In both cases pi_state has been
* established and an initial refcount on it. In case of an
* error there's nothing.
*
* The top waiter's requeue_state is up to date:
*
* - If the lock was acquired atomically (ret == 1), then
* the state is Q_REQUEUE_PI_LOCKED.
*
* The top waiter has been dequeued and woken up and can
* return to user space immediately. The kernel/user
* space state is consistent. In case that there must be
* more waiters requeued the WAITERS bit in the user
* space futex is set so the top waiter task has to go
* into the syscall slowpath to unlock the futex. This
* will block until this requeue operation has been
* completed and the hash bucket locks have been
* dropped.
*
* - If the trylock failed with an error (ret < 0) then
* the state is either Q_REQUEUE_PI_NONE, i.e. "nothing
* happened", or Q_REQUEUE_PI_IGNORE when there was an
* interleaved early wakeup.
*
* - If the trylock did not succeed (ret == 0) then the
* state is either Q_REQUEUE_PI_IN_PROGRESS or
* Q_REQUEUE_PI_WAIT if an early wakeup interleaved.
* This will be cleaned up in the loop below, which
* cannot fail because futex_proxy_trylock_atomic() did
* the same sanity checks for requeue_pi as the loop
* below does.
*/
switch (ret) {
case 0:
/* We hold a reference on the pi state. */
break;
case 1:
/*
* futex_proxy_trylock_atomic() acquired the user space
* futex. Adjust task_count.
*/
task_count++;
ret = 0;
break;
/*
* If the above failed, then pi_state is NULL and
* waiter::requeue_state is correct.
*/
case -EFAULT:
double_unlock_hb(hb1, hb2);
futex_hb_waiters_dec(hb2);
ret = fault_in_user_writeable(uaddr2);
if (!ret)
goto retry;
return ret;
case -EBUSY:
case -EAGAIN:
/*
* Two reasons for this:
* - EBUSY: Owner is exiting and we just wait for the
* exit to complete.
* - EAGAIN: The user space value changed.
*/
double_unlock_hb(hb1, hb2);
futex_hb_waiters_dec(hb2);
/*
* Handle the case where the owner is in the middle of
* exiting. Wait for the exit to complete otherwise
* this task might loop forever, aka. live lock.
*/
wait_for_owner_exiting(ret, exiting);
cond_resched();
goto retry;
default:
goto out_unlock;
}
}
plist_for_each_entry_safe(this, next, &hb1->chain, list) {
if (task_count - nr_wake >= nr_requeue)
break;
if (!futex_match(&this->key, &key1))
continue;
/*
* FUTEX_WAIT_REQUEUE_PI and FUTEX_CMP_REQUEUE_PI should always
* be paired with each other and no other futex ops.
*
* We should never be requeueing a futex_q with a pi_state,
* which is awaiting a futex_unlock_pi().
*/
if ((requeue_pi && !this->rt_waiter) ||
(!requeue_pi && this->rt_waiter) ||
this->pi_state) {
ret = -EINVAL;
break;
}
/* Plain futexes just wake or requeue and are done */
if (!requeue_pi) {
if (++task_count <= nr_wake)
this->wake(&wake_q, this);
else
requeue_futex(this, hb1, hb2, &key2);
continue;
}
/* Ensure we requeue to the expected futex for requeue_pi. */
if (!futex_match(this->requeue_pi_key, &key2)) {
ret = -EINVAL;
break;
}
/*
* Requeue nr_requeue waiters and possibly one more in the case
* of requeue_pi if we couldn't acquire the lock atomically.
*
* Prepare the waiter to take the rt_mutex. Take a refcount
* on the pi_state and store the pointer in the futex_q
* object of the waiter.
*/
get_pi_state(pi_state);
/* Don't requeue when the waiter is already on the way out. */
if (!futex_requeue_pi_prepare(this, pi_state)) {
/*
* Early woken waiter signaled that it is on the
* way out. Drop the pi_state reference and try the
* next waiter. @this->pi_state is still NULL.
*/
put_pi_state(pi_state);
continue;
}
ret = rt_mutex_start_proxy_lock(&pi_state->pi_mutex,
this->rt_waiter,
this->task);
if (ret == 1) {
/*
* We got the lock. We do neither drop the refcount
* on pi_state nor clear this->pi_state because the
* waiter needs the pi_state for cleaning up the
* user space value. It will drop the refcount
* after doing so. this::requeue_state is updated
* in the wakeup as well.
*/
requeue_pi_wake_futex(this, &key2, hb2);
task_count++;
} else if (!ret) {
/* Waiter is queued, move it to hb2 */
requeue_futex(this, hb1, hb2, &key2);
continue;
}
/* Ensure we requeue to the expected futex for requeue_pi. */
if (!futex_match(this->requeue_pi_key, &key2)) {
ret = -EINVAL;
break;
futex_requeue_pi_complete(this, 0);
task_count++;
} else {
/*
* rt_mutex_start_proxy_lock() detected a potential
* deadlock when we tried to queue that waiter.
* Drop the pi_state reference which we took above
* and remove the pointer to the state from the
* waiters futex_q object.
*/
this->pi_state = NULL;
put_pi_state(pi_state);
futex_requeue_pi_complete(this, ret);
/*
* We stop queueing more waiters and let user space
* deal with the mess.
*/
break;
}
}
/*
* Requeue nr_requeue waiters and possibly one more in the case
* of requeue_pi if we couldn't acquire the lock atomically.
*
* Prepare the waiter to take the rt_mutex. Take a refcount
* on the pi_state and store the pointer in the futex_q
* object of the waiter.
* We took an extra initial reference to the pi_state in
* futex_proxy_trylock_atomic(). We need to drop it here again.
*/
get_pi_state(pi_state);
/* Don't requeue when the waiter is already on the way out. */
if (!futex_requeue_pi_prepare(this, pi_state)) {
/*
* Early woken waiter signaled that it is on the
* way out. Drop the pi_state reference and try the
* next waiter. @this->pi_state is still NULL.
*/
put_pi_state(pi_state);
continue;
}
ret = rt_mutex_start_proxy_lock(&pi_state->pi_mutex,
this->rt_waiter,
this->task);
if (ret == 1) {
/*
* We got the lock. We do neither drop the refcount
* on pi_state nor clear this->pi_state because the
* waiter needs the pi_state for cleaning up the
* user space value. It will drop the refcount
* after doing so. this::requeue_state is updated
* in the wakeup as well.
*/
requeue_pi_wake_futex(this, &key2, hb2);
task_count++;
} else if (!ret) {
/* Waiter is queued, move it to hb2 */
requeue_futex(this, hb1, hb2, &key2);
futex_requeue_pi_complete(this, 0);
task_count++;
} else {
/*
* rt_mutex_start_proxy_lock() detected a potential
* deadlock when we tried to queue that waiter.
* Drop the pi_state reference which we took above
* and remove the pointer to the state from the
* waiters futex_q object.
*/
this->pi_state = NULL;
put_pi_state(pi_state);
futex_requeue_pi_complete(this, ret);
/*
* We stop queueing more waiters and let user space
* deal with the mess.
*/
break;
}
}
/*
* We took an extra initial reference to the pi_state in
* futex_proxy_trylock_atomic(). We need to drop it here again.
*/
put_pi_state(pi_state);
put_pi_state(pi_state);
out_unlock:
double_unlock_hb(hb1, hb2);
double_unlock_hb(hb1, hb2);
futex_hb_waiters_dec(hb2);
}
wake_up_q(&wake_q);
futex_hb_waiters_dec(hb2);
return ret ? ret : task_count;
}

View File

@@ -253,7 +253,6 @@ int futex_wake_op(u32 __user *uaddr1, unsigned int flags, u32 __user *uaddr2,
int nr_wake, int nr_wake2, int op)
{
union futex_key key1 = FUTEX_KEY_INIT, key2 = FUTEX_KEY_INIT;
struct futex_hash_bucket *hb1, *hb2;
struct futex_q *this, *next;
int ret, op_ret;
DEFINE_WAKE_Q(wake_q);
@@ -266,67 +265,71 @@ retry:
if (unlikely(ret != 0))
return ret;
hb1 = futex_hash(&key1);
hb2 = futex_hash(&key2);
retry_private:
double_lock_hb(hb1, hb2);
op_ret = futex_atomic_op_inuser(op, uaddr2);
if (unlikely(op_ret < 0)) {
double_unlock_hb(hb1, hb2);
if (1) {
struct futex_hash_bucket *hb1, *hb2;
if (!IS_ENABLED(CONFIG_MMU) ||
unlikely(op_ret != -EFAULT && op_ret != -EAGAIN)) {
/*
* we don't get EFAULT from MMU faults if we don't have
* an MMU, but we might get them from range checking
*/
ret = op_ret;
return ret;
}
hb1 = futex_hash(&key1);
hb2 = futex_hash(&key2);
if (op_ret == -EFAULT) {
ret = fault_in_user_writeable(uaddr2);
if (ret)
double_lock_hb(hb1, hb2);
op_ret = futex_atomic_op_inuser(op, uaddr2);
if (unlikely(op_ret < 0)) {
double_unlock_hb(hb1, hb2);
if (!IS_ENABLED(CONFIG_MMU) ||
unlikely(op_ret != -EFAULT && op_ret != -EAGAIN)) {
/*
* we don't get EFAULT from MMU faults if we don't have
* an MMU, but we might get them from range checking
*/
ret = op_ret;
return ret;
}
cond_resched();
if (!(flags & FLAGS_SHARED))
goto retry_private;
goto retry;
}
plist_for_each_entry_safe(this, next, &hb1->chain, list) {
if (futex_match (&this->key, &key1)) {
if (this->pi_state || this->rt_waiter) {
ret = -EINVAL;
goto out_unlock;
}
this->wake(&wake_q, this);
if (++ret >= nr_wake)
break;
}
}
if (op_ret > 0) {
op_ret = 0;
plist_for_each_entry_safe(this, next, &hb2->chain, list) {
if (futex_match (&this->key, &key2)) {
if (op_ret == -EFAULT) {
ret = fault_in_user_writeable(uaddr2);
if (ret)
return ret;
}
cond_resched();
if (!(flags & FLAGS_SHARED))
goto retry_private;
goto retry;
}
plist_for_each_entry_safe(this, next, &hb1->chain, list) {
if (futex_match(&this->key, &key1)) {
if (this->pi_state || this->rt_waiter) {
ret = -EINVAL;
goto out_unlock;
}
this->wake(&wake_q, this);
if (++op_ret >= nr_wake2)
if (++ret >= nr_wake)
break;
}
}
ret += op_ret;
}
if (op_ret > 0) {
op_ret = 0;
plist_for_each_entry_safe(this, next, &hb2->chain, list) {
if (futex_match(&this->key, &key2)) {
if (this->pi_state || this->rt_waiter) {
ret = -EINVAL;
goto out_unlock;
}
this->wake(&wake_q, this);
if (++op_ret >= nr_wake2)
break;
}
}
ret += op_ret;
}
out_unlock:
double_unlock_hb(hb1, hb2);
double_unlock_hb(hb1, hb2);
}
wake_up_q(&wake_q);
return ret;
}
@@ -402,7 +405,6 @@ int futex_unqueue_multiple(struct futex_vector *v, int count)
*/
int futex_wait_multiple_setup(struct futex_vector *vs, int count, int *woken)
{
struct futex_hash_bucket *hb;
bool retry = false;
int ret, i;
u32 uval;
@@ -441,21 +443,25 @@ retry:
struct futex_q *q = &vs[i].q;
u32 val = vs[i].w.val;
hb = futex_hash(&q->key);
futex_q_lock(q, hb);
ret = futex_get_value_locked(&uval, uaddr);
if (1) {
struct futex_hash_bucket *hb;
if (!ret && uval == val) {
/*
* The bucket lock can't be held while dealing with the
* next futex. Queue each futex at this moment so hb can
* be unlocked.
*/
futex_queue(q, hb, current);
continue;
hb = futex_hash(&q->key);
futex_q_lock(q, hb);
ret = futex_get_value_locked(&uval, uaddr);
if (!ret && uval == val) {
/*
* The bucket lock can't be held while dealing with the
* next futex. Queue each futex at this moment so hb can
* be unlocked.
*/
futex_queue(q, hb, current);
continue;
}
futex_q_unlock(hb);
}
futex_q_unlock(hb);
__set_current_state(TASK_RUNNING);
/*
@@ -584,7 +590,6 @@ int futex_wait_setup(u32 __user *uaddr, u32 val, unsigned int flags,
struct futex_q *q, union futex_key *key2,
struct task_struct *task)
{
struct futex_hash_bucket *hb;
u32 uval;
int ret;
@@ -612,44 +617,48 @@ retry:
return ret;
retry_private:
hb = futex_hash(&q->key);
futex_q_lock(q, hb);
if (1) {
struct futex_hash_bucket *hb;
ret = futex_get_value_locked(&uval, uaddr);
hb = futex_hash(&q->key);
futex_q_lock(q, hb);
if (ret) {
futex_q_unlock(hb);
ret = futex_get_value_locked(&uval, uaddr);
ret = get_user(uval, uaddr);
if (ret)
return ret;
if (ret) {
futex_q_unlock(hb);
if (!(flags & FLAGS_SHARED))
goto retry_private;
ret = get_user(uval, uaddr);
if (ret)
return ret;
goto retry;
if (!(flags & FLAGS_SHARED))
goto retry_private;
goto retry;
}
if (uval != val) {
futex_q_unlock(hb);
return -EWOULDBLOCK;
}
if (key2 && futex_match(&q->key, key2)) {
futex_q_unlock(hb);
return -EINVAL;
}
/*
* The task state is guaranteed to be set before another task can
* wake it. set_current_state() is implemented using smp_store_mb() and
* futex_queue() calls spin_unlock() upon completion, both serializing
* access to the hash list and forcing another memory barrier.
*/
if (task == current)
set_current_state(TASK_INTERRUPTIBLE|TASK_FREEZABLE);
futex_queue(q, hb, task);
}
if (uval != val) {
futex_q_unlock(hb);
return -EWOULDBLOCK;
}
if (key2 && futex_match(&q->key, key2)) {
futex_q_unlock(hb);
return -EINVAL;
}
/*
* The task state is guaranteed to be set before another task can
* wake it. set_current_state() is implemented using smp_store_mb() and
* futex_queue() calls spin_unlock() upon completion, both serializing
* access to the hash list and forcing another memory barrier.
*/
if (task == current)
set_current_state(TASK_INTERRUPTIBLE|TASK_FREEZABLE);
futex_queue(q, hb, task);
return ret;
}