mirror of
https://github.com/torvalds/linux.git
synced 2026-01-24 23:16:46 +00:00
net_sched: add qdisc_dequeue_drop() helper
Some qdisc like cake, codel, fq_codel might drop packets in their dequeue() method. This is currently problematic because dequeue() runs with the qdisc spinlock held. Freeing skbs can be extremely expensive. Add qdisc_dequeue_drop() method and a new TCQ_F_DEQUEUE_DROPS so that these qdiscs can opt-in to defer the skb frees after the socket spinlock is released. TCQ_F_DEQUEUE_DROPS is an attempt to not penalize other qdiscs with an extra cache line miss. Signed-off-by: Eric Dumazet <edumazet@google.com> Link: https://patch.msgid.link/20251121083256.674562-14-edumazet@google.com Signed-off-by: Paolo Abeni <pabeni@redhat.com>
This commit is contained in:
committed by
Paolo Abeni
parent
0170d7f47c
commit
191ff13e42
@@ -114,12 +114,13 @@ bool sch_direct_xmit(struct sk_buff *skb, struct Qdisc *q,
|
||||
|
||||
void __qdisc_run(struct Qdisc *q);
|
||||
|
||||
static inline void qdisc_run(struct Qdisc *q)
|
||||
static inline struct sk_buff *qdisc_run(struct Qdisc *q)
|
||||
{
|
||||
if (qdisc_run_begin(q)) {
|
||||
__qdisc_run(q);
|
||||
qdisc_run_end(q);
|
||||
return qdisc_run_end(q);
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
extern const struct nla_policy rtm_tca_policy[TCA_MAX + 1];
|
||||
|
||||
@@ -88,6 +88,8 @@ struct Qdisc {
|
||||
#define TCQ_F_INVISIBLE 0x80 /* invisible by default in dump */
|
||||
#define TCQ_F_NOLOCK 0x100 /* qdisc does not require locking */
|
||||
#define TCQ_F_OFFLOADED 0x200 /* qdisc is offloaded to HW */
|
||||
#define TCQ_F_DEQUEUE_DROPS 0x400 /* ->dequeue() can drop packets in q->to_free */
|
||||
|
||||
u32 limit;
|
||||
const struct Qdisc_ops *ops;
|
||||
struct qdisc_size_table __rcu *stab;
|
||||
@@ -119,6 +121,8 @@ struct Qdisc {
|
||||
|
||||
/* Note : we only change qstats.backlog in fast path. */
|
||||
struct gnet_stats_queue qstats;
|
||||
|
||||
struct sk_buff *to_free;
|
||||
__cacheline_group_end(Qdisc_write);
|
||||
|
||||
|
||||
@@ -218,8 +222,10 @@ static inline bool qdisc_run_begin(struct Qdisc *qdisc)
|
||||
return true;
|
||||
}
|
||||
|
||||
static inline void qdisc_run_end(struct Qdisc *qdisc)
|
||||
static inline struct sk_buff *qdisc_run_end(struct Qdisc *qdisc)
|
||||
{
|
||||
struct sk_buff *to_free = NULL;
|
||||
|
||||
if (qdisc->flags & TCQ_F_NOLOCK) {
|
||||
spin_unlock(&qdisc->seqlock);
|
||||
|
||||
@@ -232,9 +238,16 @@ static inline void qdisc_run_end(struct Qdisc *qdisc)
|
||||
if (unlikely(test_bit(__QDISC_STATE_MISSED,
|
||||
&qdisc->state)))
|
||||
__netif_schedule(qdisc);
|
||||
} else {
|
||||
WRITE_ONCE(qdisc->running, false);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
if (qdisc->flags & TCQ_F_DEQUEUE_DROPS) {
|
||||
to_free = qdisc->to_free;
|
||||
if (to_free)
|
||||
qdisc->to_free = NULL;
|
||||
}
|
||||
WRITE_ONCE(qdisc->running, false);
|
||||
return to_free;
|
||||
}
|
||||
|
||||
static inline bool qdisc_may_bulk(const struct Qdisc *qdisc)
|
||||
@@ -1116,6 +1129,17 @@ static inline void tcf_kfree_skb_list(struct sk_buff *skb)
|
||||
}
|
||||
}
|
||||
|
||||
static inline void qdisc_dequeue_drop(struct Qdisc *q, struct sk_buff *skb,
|
||||
enum skb_drop_reason reason)
|
||||
{
|
||||
DEBUG_NET_WARN_ON_ONCE(!(q->flags & TCQ_F_DEQUEUE_DROPS));
|
||||
DEBUG_NET_WARN_ON_ONCE(q->flags & TCQ_F_NOLOCK);
|
||||
|
||||
tcf_set_drop_reason(skb, reason);
|
||||
skb->next = q->to_free;
|
||||
q->to_free = skb;
|
||||
}
|
||||
|
||||
/* Instead of calling kfree_skb() while root qdisc lock is held,
|
||||
* queue the skb for future freeing at end of __dev_xmit_skb()
|
||||
*/
|
||||
|
||||
@@ -4141,7 +4141,7 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
|
||||
struct net_device *dev,
|
||||
struct netdev_queue *txq)
|
||||
{
|
||||
struct sk_buff *next, *to_free = NULL;
|
||||
struct sk_buff *next, *to_free = NULL, *to_free2 = NULL;
|
||||
spinlock_t *root_lock = qdisc_lock(q);
|
||||
struct llist_node *ll_list, *first_n;
|
||||
unsigned long defer_count = 0;
|
||||
@@ -4160,7 +4160,7 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
|
||||
if (unlikely(!nolock_qdisc_is_empty(q))) {
|
||||
rc = dev_qdisc_enqueue(skb, q, &to_free, txq);
|
||||
__qdisc_run(q);
|
||||
qdisc_run_end(q);
|
||||
to_free2 = qdisc_run_end(q);
|
||||
|
||||
goto free_skbs;
|
||||
}
|
||||
@@ -4170,12 +4170,13 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
|
||||
!nolock_qdisc_is_empty(q))
|
||||
__qdisc_run(q);
|
||||
|
||||
qdisc_run_end(q);
|
||||
return NET_XMIT_SUCCESS;
|
||||
to_free2 = qdisc_run_end(q);
|
||||
rc = NET_XMIT_SUCCESS;
|
||||
goto free_skbs;
|
||||
}
|
||||
|
||||
rc = dev_qdisc_enqueue(skb, q, &to_free, txq);
|
||||
qdisc_run(q);
|
||||
to_free2 = qdisc_run(q);
|
||||
goto free_skbs;
|
||||
}
|
||||
|
||||
@@ -4234,7 +4235,7 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
|
||||
qdisc_bstats_update(q, skb);
|
||||
if (sch_direct_xmit(skb, q, dev, txq, root_lock, true))
|
||||
__qdisc_run(q);
|
||||
qdisc_run_end(q);
|
||||
to_free2 = qdisc_run_end(q);
|
||||
rc = NET_XMIT_SUCCESS;
|
||||
} else {
|
||||
int count = 0;
|
||||
@@ -4246,7 +4247,7 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
|
||||
rc = dev_qdisc_enqueue(skb, q, &to_free, txq);
|
||||
count++;
|
||||
}
|
||||
qdisc_run(q);
|
||||
to_free2 = qdisc_run(q);
|
||||
if (count != 1)
|
||||
rc = NET_XMIT_SUCCESS;
|
||||
}
|
||||
@@ -4255,6 +4256,7 @@ unlock:
|
||||
|
||||
free_skbs:
|
||||
tcf_kfree_skb_list(to_free);
|
||||
tcf_kfree_skb_list(to_free2);
|
||||
return rc;
|
||||
}
|
||||
|
||||
@@ -5747,8 +5749,9 @@ static __latent_entropy void net_tx_action(void)
|
||||
rcu_read_lock();
|
||||
|
||||
while (head) {
|
||||
struct Qdisc *q = head;
|
||||
spinlock_t *root_lock = NULL;
|
||||
struct sk_buff *to_free;
|
||||
struct Qdisc *q = head;
|
||||
|
||||
head = head->next_sched;
|
||||
|
||||
@@ -5775,9 +5778,10 @@ static __latent_entropy void net_tx_action(void)
|
||||
}
|
||||
|
||||
clear_bit(__QDISC_STATE_SCHED, &q->state);
|
||||
qdisc_run(q);
|
||||
to_free = qdisc_run(q);
|
||||
if (root_lock)
|
||||
spin_unlock(root_lock);
|
||||
tcf_kfree_skb_list(to_free);
|
||||
}
|
||||
|
||||
rcu_read_unlock();
|
||||
|
||||
Reference in New Issue
Block a user