Skip to content

Commit 54a59ae

Browse files
borkmannkuba-moo
authored andcommitted
net, sched: Make tc-related drop reason more flexible
Currently, the kfree_skb_reason() in sch_handle_{ingress,egress}() can only express a basic SKB_DROP_REASON_TC_INGRESS or SKB_DROP_REASON_TC_EGRESS reason. Victor kicked-off an initial proposal to make this more flexible by disambiguating verdict from return code by moving the verdict into struct tcf_result and letting tcf_classify() return a negative error. If hit, then two new drop reasons were added in the proposal, that is SKB_DROP_REASON_TC_INGRESS_ERROR as well as SKB_DROP_REASON_TC_EGRESS_ERROR. Further analysis of the actual error codes would have required to attach to tcf_classify via kprobe/kretprobe to more deeply debug skb and the returned error. In order to make the kfree_skb_reason() in sch_handle_{ingress,egress}() more extensible, it can be addressed in a more straight forward way, that is: Instead of placing the verdict into struct tcf_result, we can just put the drop reason in there, which does not require changes throughout various classful schedulers given the existing verdict logic can stay as is. Then, SKB_DROP_REASON_TC_ERROR{,_*} can be added to the enum skb_drop_reason to disambiguate between an error or an intentional drop. New drop reason error codes can be added successively to the tc code base. For internal error locations which have not yet been annotated with a SKB_DROP_REASON_TC_ERROR{,_*}, the fallback is SKB_DROP_REASON_TC_INGRESS and SKB_DROP_REASON_TC_EGRESS, respectively. Generic errors could be marked with a SKB_DROP_REASON_TC_ERROR code until they are converted to more specific ones if it is found that they would be useful for troubleshooting. While drop reasons have infrastructure for subsystem specific error codes which are currently used by mac80211 and ovs, Jakub mentioned that it is preferred for tc to use the enum skb_drop_reason core codes given it is a better fit and currently the tooling support is better, too. With regards to the latter: [...] I think Alastair (bpftrace) is working on auto-prettifying enums when bpftrace outputs maps. So we can do something like: $ bpftrace -e 'tracepoint:skb:kfree_skb { @[args->reason] = count(); }' Attaching 1 probe... ^C @[SKB_DROP_REASON_TC_INGRESS]: 2 @[SKB_CONSUMED]: 34 ^^^^^^^^^^^^ names!! Auto-magically. [...] Add a small helper tcf_set_drop_reason() which can be used to set the drop reason into the tcf_result. Signed-off-by: Daniel Borkmann <daniel@iogearbox.net> Cc: Jamal Hadi Salim <jhs@mojatatu.com> Cc: Victor Nogueira <victor@mojatatu.com> Link: https://lore.kernel.org/netdev/20231006063233.74345d36@kernel.org Reviewed-by: Jakub Kicinski <kuba@kernel.org> Link: https://lore.kernel.org/r/20231009092655.22025-1-daniel@iogearbox.net Signed-off-by: Jakub Kicinski <kuba@kernel.org>
1 parent dccce1d commit 54a59ae

File tree

3 files changed

+17
-7
lines changed

3 files changed

+17
-7
lines changed

include/net/pkt_cls.h

+6
Original file line numberDiff line numberDiff line change
@@ -154,6 +154,12 @@ __cls_set_class(unsigned long *clp, unsigned long cl)
154154
return xchg(clp, cl);
155155
}
156156

157+
static inline void tcf_set_drop_reason(struct tcf_result *res,
158+
enum skb_drop_reason reason)
159+
{
160+
res->drop_reason = reason;
161+
}
162+
157163
static inline void
158164
__tcf_bind_filter(struct Qdisc *q, struct tcf_result *r, unsigned long base)
159165
{

include/net/sch_generic.h

+1-2
Original file line numberDiff line numberDiff line change
@@ -324,16 +324,15 @@ struct Qdisc_ops {
324324
struct module *owner;
325325
};
326326

327-
328327
struct tcf_result {
329328
union {
330329
struct {
331330
unsigned long class;
332331
u32 classid;
333332
};
334333
const struct tcf_proto *goto_tp;
335-
336334
};
335+
enum skb_drop_reason drop_reason;
337336
};
338337

339338
struct tcf_chain;

net/core/dev.c

+10-5
Original file line numberDiff line numberDiff line change
@@ -3914,7 +3914,8 @@ EXPORT_SYMBOL_GPL(netdev_xmit_skip_txqueue);
39143914
#endif /* CONFIG_NET_EGRESS */
39153915

39163916
#ifdef CONFIG_NET_XGRESS
3917-
static int tc_run(struct tcx_entry *entry, struct sk_buff *skb)
3917+
static int tc_run(struct tcx_entry *entry, struct sk_buff *skb,
3918+
enum skb_drop_reason *drop_reason)
39183919
{
39193920
int ret = TC_ACT_UNSPEC;
39203921
#ifdef CONFIG_NET_CLS_ACT
@@ -3926,12 +3927,14 @@ static int tc_run(struct tcx_entry *entry, struct sk_buff *skb)
39263927

39273928
tc_skb_cb(skb)->mru = 0;
39283929
tc_skb_cb(skb)->post_ct = false;
3930+
res.drop_reason = *drop_reason;
39293931

39303932
mini_qdisc_bstats_cpu_update(miniq, skb);
39313933
ret = tcf_classify(skb, miniq->block, miniq->filter_list, &res, false);
39323934
/* Only tcf related quirks below. */
39333935
switch (ret) {
39343936
case TC_ACT_SHOT:
3937+
*drop_reason = res.drop_reason;
39353938
mini_qdisc_qstats_cpu_drop(miniq);
39363939
break;
39373940
case TC_ACT_OK:
@@ -3981,6 +3984,7 @@ sch_handle_ingress(struct sk_buff *skb, struct packet_type **pt_prev, int *ret,
39813984
struct net_device *orig_dev, bool *another)
39823985
{
39833986
struct bpf_mprog_entry *entry = rcu_dereference_bh(skb->dev->tcx_ingress);
3987+
enum skb_drop_reason drop_reason = SKB_DROP_REASON_TC_INGRESS;
39843988
int sch_ret;
39853989

39863990
if (!entry)
@@ -3998,7 +4002,7 @@ sch_handle_ingress(struct sk_buff *skb, struct packet_type **pt_prev, int *ret,
39984002
if (sch_ret != TC_ACT_UNSPEC)
39994003
goto ingress_verdict;
40004004
}
4001-
sch_ret = tc_run(tcx_entry(entry), skb);
4005+
sch_ret = tc_run(tcx_entry(entry), skb, &drop_reason);
40024006
ingress_verdict:
40034007
switch (sch_ret) {
40044008
case TC_ACT_REDIRECT:
@@ -4015,7 +4019,7 @@ sch_handle_ingress(struct sk_buff *skb, struct packet_type **pt_prev, int *ret,
40154019
*ret = NET_RX_SUCCESS;
40164020
return NULL;
40174021
case TC_ACT_SHOT:
4018-
kfree_skb_reason(skb, SKB_DROP_REASON_TC_INGRESS);
4022+
kfree_skb_reason(skb, drop_reason);
40194023
*ret = NET_RX_DROP;
40204024
return NULL;
40214025
/* used by tc_run */
@@ -4036,6 +4040,7 @@ static __always_inline struct sk_buff *
40364040
sch_handle_egress(struct sk_buff *skb, int *ret, struct net_device *dev)
40374041
{
40384042
struct bpf_mprog_entry *entry = rcu_dereference_bh(dev->tcx_egress);
4043+
enum skb_drop_reason drop_reason = SKB_DROP_REASON_TC_EGRESS;
40394044
int sch_ret;
40404045

40414046
if (!entry)
@@ -4049,7 +4054,7 @@ sch_handle_egress(struct sk_buff *skb, int *ret, struct net_device *dev)
40494054
if (sch_ret != TC_ACT_UNSPEC)
40504055
goto egress_verdict;
40514056
}
4052-
sch_ret = tc_run(tcx_entry(entry), skb);
4057+
sch_ret = tc_run(tcx_entry(entry), skb, &drop_reason);
40534058
egress_verdict:
40544059
switch (sch_ret) {
40554060
case TC_ACT_REDIRECT:
@@ -4058,7 +4063,7 @@ sch_handle_egress(struct sk_buff *skb, int *ret, struct net_device *dev)
40584063
*ret = NET_XMIT_SUCCESS;
40594064
return NULL;
40604065
case TC_ACT_SHOT:
4061-
kfree_skb_reason(skb, SKB_DROP_REASON_TC_EGRESS);
4066+
kfree_skb_reason(skb, drop_reason);
40624067
*ret = NET_XMIT_DROP;
40634068
return NULL;
40644069
/* used by tc_run */

0 commit comments

Comments
 (0)