From 5e219e6228cb7b13a7d9a1d05c6e4846363fd6fe Mon Sep 17 00:00:00 2001 From: David Morley Date: Fri, 14 Jul 2023 11:07:56 -0400 Subject: [PATCH 15/19] tcp: introduce per-route feature RTAX_FEATURE_ECN_LOW Define and implement a new per-route feature, RTAX_FEATURE_ECN_LOW. This feature indicates that the given destination network is a low-latency ECN environment, meaning both that ECN CE marks are applied by the network using a low-latency marking threshold and also that TCP endpoints provide precise per-data-segment ECN feedback in ACKs (where the ACK ECE flag echoes the received CE status of all newly-acknowledged data segments). This feature indication can be used by congestion control algorithms to decide how to interpret ECN signals over the given destination network. This feature is appropriate for datacenter-style ECN marking, such as the ECN marking approach expected by DCTCP or BBR congestion control modules. Signed-off-by: David Morley Signed-off-by: Neal Cardwell Signed-off-by: Yuchung Cheng Tested-by: David Morley Change-Id: I6bc06e9c6cb426fbae7243fc71c9a8c18175f5d3 Signed-off-by: Alexandre Frade --- include/net/tcp.h | 10 ++++++++++ include/uapi/linux/rtnetlink.h | 4 +++- net/ipv4/tcp_minisocks.c | 2 ++ net/ipv4/tcp_output.c | 6 ++++-- 4 files changed, 19 insertions(+), 3 deletions(-) --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -375,6 +375,7 @@ static inline void tcp_dec_quickack_mode #define TCP_ECN_QUEUE_CWR 2 #define TCP_ECN_DEMAND_CWR 4 #define TCP_ECN_SEEN 8 +#define TCP_ECN_LOW 16 enum tcp_tw_status { TCP_TW_SUCCESS = 0, @@ -777,6 +778,15 @@ static inline void tcp_fast_path_check(s tcp_fast_path_on(tp); } +static inline void tcp_set_ecn_low_from_dst(struct sock *sk, + const struct dst_entry *dst) +{ + struct tcp_sock *tp = tcp_sk(sk); + + if (dst_feature(dst, RTAX_FEATURE_ECN_LOW)) + tp->ecn_flags |= TCP_ECN_LOW; +} + u32 tcp_delack_max(const struct sock *sk); /* Compute the actual rto_min value */ --- a/include/uapi/linux/rtnetlink.h +++ b/include/uapi/linux/rtnetlink.h @@ -507,12 +507,14 @@ enum { #define RTAX_FEATURE_TIMESTAMP (1 << 2) /* unused */ #define RTAX_FEATURE_ALLFRAG (1 << 3) /* unused */ #define RTAX_FEATURE_TCP_USEC_TS (1 << 4) +#define RTAX_FEATURE_ECN_LOW (1 << 5) #define RTAX_FEATURE_MASK (RTAX_FEATURE_ECN | \ RTAX_FEATURE_SACK | \ RTAX_FEATURE_TIMESTAMP | \ RTAX_FEATURE_ALLFRAG | \ - RTAX_FEATURE_TCP_USEC_TS) + RTAX_FEATURE_TCP_USEC_TS | \ + RTAX_FEATURE_ECN_LOW) struct rta_session { __u8 proto; --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -462,6 +462,8 @@ void tcp_ca_openreq_child(struct sock *s u32 ca_key = dst_metric(dst, RTAX_CC_ALGO); bool ca_got_dst = false; + tcp_set_ecn_low_from_dst(sk, dst); + if (ca_key != TCP_CA_UNSPEC) { const struct tcp_congestion_ops *ca; --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -336,10 +336,9 @@ static void tcp_ecn_send_syn(struct sock bool bpf_needs_ecn = tcp_bpf_ca_needs_ecn(sk); bool use_ecn = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_ecn) == 1 || tcp_ca_needs_ecn(sk) || bpf_needs_ecn; + const struct dst_entry *dst = __sk_dst_get(sk); if (!use_ecn) { - const struct dst_entry *dst = __sk_dst_get(sk); - if (dst && dst_feature(dst, RTAX_FEATURE_ECN)) use_ecn = true; } @@ -351,6 +350,9 @@ static void tcp_ecn_send_syn(struct sock tp->ecn_flags = TCP_ECN_OK; if (tcp_ca_needs_ecn(sk) || bpf_needs_ecn) INET_ECN_xmit(sk); + + if (dst) + tcp_set_ecn_low_from_dst(sk, dst); } }