mirror of
https://github.com/torvalds/linux.git
synced 2026-01-24 23:16:46 +00:00
net: Extend NAPI threaded polling to allow kthread based busy polling
Add a new state NAPI_STATE_THREADED_BUSY_POLL to the NAPI state enum to enable and disable threaded busy polling. When threaded busy polling is enabled for a NAPI, enable NAPI_STATE_THREADED also. When the threaded NAPI is scheduled, set NAPI_STATE_IN_BUSY_POLL to signal napi_complete_done not to rearm interrupts. Whenever NAPI_STATE_THREADED_BUSY_POLL is unset, the NAPI_STATE_IN_BUSY_POLL will be unset, napi_complete_done unsets the NAPI_STATE_SCHED_THREADED bit also, which in turn will make the kthread go to sleep. Signed-off-by: Samiullah Khawaja <skhawaja@google.com> Reviewed-by: Willem de Bruijn <willemb@google.com> Acked-by: Martin Karsten <mkarsten@uwaterloo.ca> Tested-by: Martin Karsten <mkarsten@uwaterloo.ca> Link: https://patch.msgid.link/20251028203007.575686-2-skhawaja@google.com Signed-off-by: Jakub Kicinski <kuba@kernel.org>
This commit is contained in:
committed by
Jakub Kicinski
parent
998b5d9683
commit
c18d4b190a
@@ -88,7 +88,7 @@ definitions:
|
||||
-
|
||||
name: napi-threaded
|
||||
type: enum
|
||||
entries: [disabled, enabled]
|
||||
entries: [disabled, enabled, busy-poll]
|
||||
|
||||
attribute-sets:
|
||||
-
|
||||
@@ -291,7 +291,8 @@ attribute-sets:
|
||||
name: threaded
|
||||
doc: Whether the NAPI is configured to operate in threaded polling
|
||||
mode. If this is set to enabled then the NAPI context operates
|
||||
in threaded polling mode.
|
||||
in threaded polling mode. If this is set to busy-poll, then the
|
||||
threaded polling mode also busy polls.
|
||||
type: u32
|
||||
enum: napi-threaded
|
||||
-
|
||||
|
||||
@@ -263,7 +263,9 @@ are not well known).
|
||||
Busy polling is enabled by either setting ``SO_BUSY_POLL`` on
|
||||
selected sockets or using the global ``net.core.busy_poll`` and
|
||||
``net.core.busy_read`` sysctls. An io_uring API for NAPI busy polling
|
||||
also exists.
|
||||
also exists. Threaded polling of NAPI also has a mode to busy poll for
|
||||
packets (:ref:`threaded busy polling<threaded_busy_poll>`) using the NAPI
|
||||
processing kthread.
|
||||
|
||||
epoll-based busy polling
|
||||
------------------------
|
||||
@@ -426,6 +428,52 @@ Therefore, setting ``gro_flush_timeout`` and ``napi_defer_hard_irqs`` is
|
||||
the recommended usage, because otherwise setting ``irq-suspend-timeout``
|
||||
might not have any discernible effect.
|
||||
|
||||
.. _threaded_busy_poll:
|
||||
|
||||
Threaded NAPI busy polling
|
||||
--------------------------
|
||||
|
||||
Threaded NAPI busy polling extends threaded NAPI and adds support to do
|
||||
continuous busy polling of the NAPI. This can be useful for forwarding or
|
||||
AF_XDP applications.
|
||||
|
||||
Threaded NAPI busy polling can be enabled on per NIC queue basis using Netlink.
|
||||
|
||||
For example, using the following script:
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
$ ynl --family netdev --do napi-set \
|
||||
--json='{"id": 66, "threaded": "busy-poll"}'
|
||||
|
||||
The kernel will create a kthread that busy polls on this NAPI.
|
||||
|
||||
The user may elect to set the CPU affinity of this kthread to an unused CPU
|
||||
core to improve how often the NAPI is polled at the expense of wasted CPU
|
||||
cycles. Note that this will keep the CPU core busy with 100% usage.
|
||||
|
||||
Once threaded busy polling is enabled for a NAPI, PID of the kthread can be
|
||||
retrieved using Netlink so the affinity of the kthread can be set up.
|
||||
|
||||
For example, the following script can be used to fetch the PID:
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
$ ynl --family netdev --do napi-get --json='{"id": 66}'
|
||||
|
||||
This will output something like following, the pid `258` is the PID of the
|
||||
kthread that is polling this NAPI.
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
$ {'defer-hard-irqs': 0,
|
||||
'gro-flush-timeout': 0,
|
||||
'id': 66,
|
||||
'ifindex': 2,
|
||||
'irq-suspend-timeout': 0,
|
||||
'pid': 258,
|
||||
'threaded': 'busy-poll'}
|
||||
|
||||
.. _threaded:
|
||||
|
||||
Threaded NAPI
|
||||
|
||||
@@ -423,11 +423,12 @@ enum {
|
||||
NAPI_STATE_NPSVC, /* Netpoll - don't dequeue from poll_list */
|
||||
NAPI_STATE_LISTED, /* NAPI added to system lists */
|
||||
NAPI_STATE_NO_BUSY_POLL, /* Do not add in napi_hash, no busy polling */
|
||||
NAPI_STATE_IN_BUSY_POLL, /* sk_busy_loop() owns this NAPI */
|
||||
NAPI_STATE_IN_BUSY_POLL, /* Do not rearm NAPI interrupt */
|
||||
NAPI_STATE_PREFER_BUSY_POLL, /* prefer busy-polling over softirq processing*/
|
||||
NAPI_STATE_THREADED, /* The poll is performed inside its own thread*/
|
||||
NAPI_STATE_SCHED_THREADED, /* Napi is currently scheduled in threaded mode */
|
||||
NAPI_STATE_HAS_NOTIFIER, /* Napi has an IRQ notifier */
|
||||
NAPI_STATE_THREADED_BUSY_POLL, /* The threaded NAPI poller will busy poll */
|
||||
};
|
||||
|
||||
enum {
|
||||
@@ -442,6 +443,7 @@ enum {
|
||||
NAPIF_STATE_THREADED = BIT(NAPI_STATE_THREADED),
|
||||
NAPIF_STATE_SCHED_THREADED = BIT(NAPI_STATE_SCHED_THREADED),
|
||||
NAPIF_STATE_HAS_NOTIFIER = BIT(NAPI_STATE_HAS_NOTIFIER),
|
||||
NAPIF_STATE_THREADED_BUSY_POLL = BIT(NAPI_STATE_THREADED_BUSY_POLL),
|
||||
};
|
||||
|
||||
enum gro_result {
|
||||
|
||||
@@ -80,6 +80,7 @@ enum netdev_qstats_scope {
|
||||
enum netdev_napi_threaded {
|
||||
NETDEV_NAPI_THREADED_DISABLED,
|
||||
NETDEV_NAPI_THREADED_ENABLED,
|
||||
NETDEV_NAPI_THREADED_BUSY_POLL,
|
||||
};
|
||||
|
||||
enum {
|
||||
|
||||
@@ -7089,7 +7089,8 @@ static void napi_stop_kthread(struct napi_struct *napi)
|
||||
*/
|
||||
if ((val & NAPIF_STATE_SCHED_THREADED) ||
|
||||
!(val & NAPIF_STATE_SCHED)) {
|
||||
new = val & (~NAPIF_STATE_THREADED);
|
||||
new = val & (~(NAPIF_STATE_THREADED |
|
||||
NAPIF_STATE_THREADED_BUSY_POLL));
|
||||
} else {
|
||||
msleep(20);
|
||||
continue;
|
||||
@@ -7113,6 +7114,16 @@ static void napi_stop_kthread(struct napi_struct *napi)
|
||||
napi->thread = NULL;
|
||||
}
|
||||
|
||||
static void napi_set_threaded_state(struct napi_struct *napi,
|
||||
enum netdev_napi_threaded threaded_mode)
|
||||
{
|
||||
bool threaded = threaded_mode != NETDEV_NAPI_THREADED_DISABLED;
|
||||
bool busy_poll = threaded_mode == NETDEV_NAPI_THREADED_BUSY_POLL;
|
||||
|
||||
assign_bit(NAPI_STATE_THREADED, &napi->state, threaded);
|
||||
assign_bit(NAPI_STATE_THREADED_BUSY_POLL, &napi->state, busy_poll);
|
||||
}
|
||||
|
||||
int napi_set_threaded(struct napi_struct *napi,
|
||||
enum netdev_napi_threaded threaded)
|
||||
{
|
||||
@@ -7139,7 +7150,7 @@ int napi_set_threaded(struct napi_struct *napi,
|
||||
} else {
|
||||
/* Make sure kthread is created before THREADED bit is set. */
|
||||
smp_mb__before_atomic();
|
||||
assign_bit(NAPI_STATE_THREADED, &napi->state, threaded);
|
||||
napi_set_threaded_state(napi, threaded);
|
||||
}
|
||||
|
||||
return 0;
|
||||
@@ -7531,7 +7542,9 @@ void napi_disable_locked(struct napi_struct *n)
|
||||
}
|
||||
|
||||
new = val | NAPIF_STATE_SCHED | NAPIF_STATE_NPSVC;
|
||||
new &= ~(NAPIF_STATE_THREADED | NAPIF_STATE_PREFER_BUSY_POLL);
|
||||
new &= ~(NAPIF_STATE_THREADED |
|
||||
NAPIF_STATE_THREADED_BUSY_POLL |
|
||||
NAPIF_STATE_PREFER_BUSY_POLL);
|
||||
} while (!try_cmpxchg(&n->state, &val, new));
|
||||
|
||||
hrtimer_cancel(&n->timer);
|
||||
@@ -7743,7 +7756,7 @@ static int napi_thread_wait(struct napi_struct *napi)
|
||||
return -1;
|
||||
}
|
||||
|
||||
static void napi_threaded_poll_loop(struct napi_struct *napi)
|
||||
static void napi_threaded_poll_loop(struct napi_struct *napi, bool busy_poll)
|
||||
{
|
||||
struct bpf_net_context __bpf_net_ctx, *bpf_net_ctx;
|
||||
struct softnet_data *sd;
|
||||
@@ -7772,22 +7785,47 @@ static void napi_threaded_poll_loop(struct napi_struct *napi)
|
||||
}
|
||||
skb_defer_free_flush();
|
||||
bpf_net_ctx_clear(bpf_net_ctx);
|
||||
|
||||
/* When busy poll is enabled, the old packets are not flushed in
|
||||
* napi_complete_done. So flush them here.
|
||||
*/
|
||||
if (busy_poll)
|
||||
gro_flush_normal(&napi->gro, HZ >= 1000);
|
||||
local_bh_enable();
|
||||
|
||||
/* Call cond_resched here to avoid watchdog warnings. */
|
||||
if (repoll || busy_poll) {
|
||||
rcu_softirq_qs_periodic(last_qs);
|
||||
cond_resched();
|
||||
}
|
||||
|
||||
if (!repoll)
|
||||
break;
|
||||
|
||||
rcu_softirq_qs_periodic(last_qs);
|
||||
cond_resched();
|
||||
}
|
||||
}
|
||||
|
||||
static int napi_threaded_poll(void *data)
|
||||
{
|
||||
struct napi_struct *napi = data;
|
||||
bool want_busy_poll;
|
||||
bool in_busy_poll;
|
||||
unsigned long val;
|
||||
|
||||
while (!napi_thread_wait(napi))
|
||||
napi_threaded_poll_loop(napi);
|
||||
while (!napi_thread_wait(napi)) {
|
||||
val = READ_ONCE(napi->state);
|
||||
|
||||
want_busy_poll = val & NAPIF_STATE_THREADED_BUSY_POLL;
|
||||
in_busy_poll = val & NAPIF_STATE_IN_BUSY_POLL;
|
||||
|
||||
if (unlikely(val & NAPIF_STATE_DISABLE))
|
||||
want_busy_poll = false;
|
||||
|
||||
if (want_busy_poll != in_busy_poll)
|
||||
assign_bit(NAPI_STATE_IN_BUSY_POLL, &napi->state,
|
||||
want_busy_poll);
|
||||
|
||||
napi_threaded_poll_loop(napi, want_busy_poll);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
@@ -13097,7 +13135,7 @@ static void run_backlog_napi(unsigned int cpu)
|
||||
{
|
||||
struct softnet_data *sd = per_cpu_ptr(&softnet_data, cpu);
|
||||
|
||||
napi_threaded_poll_loop(&sd->backlog);
|
||||
napi_threaded_poll_loop(&sd->backlog, false);
|
||||
}
|
||||
|
||||
static void backlog_napi_setup(unsigned int cpu)
|
||||
|
||||
@@ -317,6 +317,9 @@ static inline void napi_set_irq_suspend_timeout(struct napi_struct *n,
|
||||
|
||||
static inline enum netdev_napi_threaded napi_get_threaded(struct napi_struct *n)
|
||||
{
|
||||
if (test_bit(NAPI_STATE_THREADED_BUSY_POLL, &n->state))
|
||||
return NETDEV_NAPI_THREADED_BUSY_POLL;
|
||||
|
||||
if (test_bit(NAPI_STATE_THREADED, &n->state))
|
||||
return NETDEV_NAPI_THREADED_ENABLED;
|
||||
|
||||
|
||||
@@ -97,7 +97,7 @@ static const struct nla_policy netdev_napi_set_nl_policy[NETDEV_A_NAPI_THREADED
|
||||
[NETDEV_A_NAPI_DEFER_HARD_IRQS] = NLA_POLICY_FULL_RANGE(NLA_U32, &netdev_a_napi_defer_hard_irqs_range),
|
||||
[NETDEV_A_NAPI_GRO_FLUSH_TIMEOUT] = { .type = NLA_UINT, },
|
||||
[NETDEV_A_NAPI_IRQ_SUSPEND_TIMEOUT] = { .type = NLA_UINT, },
|
||||
[NETDEV_A_NAPI_THREADED] = NLA_POLICY_MAX(NLA_U32, 1),
|
||||
[NETDEV_A_NAPI_THREADED] = NLA_POLICY_MAX(NLA_U32, 2),
|
||||
};
|
||||
|
||||
/* NETDEV_CMD_BIND_TX - do */
|
||||
|
||||
@@ -80,6 +80,7 @@ enum netdev_qstats_scope {
|
||||
enum netdev_napi_threaded {
|
||||
NETDEV_NAPI_THREADED_DISABLED,
|
||||
NETDEV_NAPI_THREADED_ENABLED,
|
||||
NETDEV_NAPI_THREADED_BUSY_POLL,
|
||||
};
|
||||
|
||||
enum {
|
||||
|
||||
Reference in New Issue
Block a user