113 lines
3.6 KiB
Diff
113 lines
3.6 KiB
Diff
From 5e219e6228cb7b13a7d9a1d05c6e4846363fd6fe Mon Sep 17 00:00:00 2001
|
|
From: David Morley <morleyd@google.com>
|
|
Date: Fri, 14 Jul 2023 11:07:56 -0400
|
|
Subject: [PATCH 15/19] tcp: introduce per-route feature RTAX_FEATURE_ECN_LOW
|
|
|
|
Define and implement a new per-route feature, RTAX_FEATURE_ECN_LOW.
|
|
|
|
This feature indicates that the given destination network is a
|
|
low-latency ECN environment, meaning both that ECN CE marks are
|
|
applied by the network using a low-latency marking threshold and also
|
|
that TCP endpoints provide precise per-data-segment ECN feedback in
|
|
ACKs (where the ACK ECE flag echoes the received CE status of all
|
|
newly-acknowledged data segments). This feature indication can be used
|
|
by congestion control algorithms to decide how to interpret ECN
|
|
signals over the given destination network.
|
|
|
|
This feature is appropriate for datacenter-style ECN marking, such as
|
|
the ECN marking approach expected by DCTCP or BBR congestion control
|
|
modules.
|
|
|
|
Signed-off-by: David Morley <morleyd@google.com>
|
|
Signed-off-by: Neal Cardwell <ncardwell@google.com>
|
|
Signed-off-by: Yuchung Cheng <ycheng@google.com>
|
|
Tested-by: David Morley <morleyd@google.com>
|
|
Change-Id: I6bc06e9c6cb426fbae7243fc71c9a8c18175f5d3
|
|
Signed-off-by: Alexandre Frade <kernel@xanmod.org>
|
|
---
|
|
include/net/tcp.h | 10 ++++++++++
|
|
include/uapi/linux/rtnetlink.h | 4 +++-
|
|
net/ipv4/tcp_minisocks.c | 2 ++
|
|
net/ipv4/tcp_output.c | 6 ++++--
|
|
4 files changed, 19 insertions(+), 3 deletions(-)
|
|
|
|
--- a/include/net/tcp.h
|
|
+++ b/include/net/tcp.h
|
|
@@ -375,6 +375,7 @@ static inline void tcp_dec_quickack_mode
|
|
#define TCP_ECN_QUEUE_CWR 2
|
|
#define TCP_ECN_DEMAND_CWR 4
|
|
#define TCP_ECN_SEEN 8
|
|
+#define TCP_ECN_LOW 16
|
|
|
|
enum tcp_tw_status {
|
|
TCP_TW_SUCCESS = 0,
|
|
@@ -777,6 +778,15 @@ static inline void tcp_fast_path_check(s
|
|
tcp_fast_path_on(tp);
|
|
}
|
|
|
|
+static inline void tcp_set_ecn_low_from_dst(struct sock *sk,
|
|
+ const struct dst_entry *dst)
|
|
+{
|
|
+ struct tcp_sock *tp = tcp_sk(sk);
|
|
+
|
|
+ if (dst_feature(dst, RTAX_FEATURE_ECN_LOW))
|
|
+ tp->ecn_flags |= TCP_ECN_LOW;
|
|
+}
|
|
+
|
|
u32 tcp_delack_max(const struct sock *sk);
|
|
|
|
/* Compute the actual rto_min value */
|
|
--- a/include/uapi/linux/rtnetlink.h
|
|
+++ b/include/uapi/linux/rtnetlink.h
|
|
@@ -507,12 +507,14 @@ enum {
|
|
#define RTAX_FEATURE_TIMESTAMP (1 << 2) /* unused */
|
|
#define RTAX_FEATURE_ALLFRAG (1 << 3) /* unused */
|
|
#define RTAX_FEATURE_TCP_USEC_TS (1 << 4)
|
|
+#define RTAX_FEATURE_ECN_LOW (1 << 5)
|
|
|
|
#define RTAX_FEATURE_MASK (RTAX_FEATURE_ECN | \
|
|
RTAX_FEATURE_SACK | \
|
|
RTAX_FEATURE_TIMESTAMP | \
|
|
RTAX_FEATURE_ALLFRAG | \
|
|
- RTAX_FEATURE_TCP_USEC_TS)
|
|
+ RTAX_FEATURE_TCP_USEC_TS | \
|
|
+ RTAX_FEATURE_ECN_LOW)
|
|
|
|
struct rta_session {
|
|
__u8 proto;
|
|
--- a/net/ipv4/tcp_minisocks.c
|
|
+++ b/net/ipv4/tcp_minisocks.c
|
|
@@ -462,6 +462,8 @@ void tcp_ca_openreq_child(struct sock *s
|
|
u32 ca_key = dst_metric(dst, RTAX_CC_ALGO);
|
|
bool ca_got_dst = false;
|
|
|
|
+ tcp_set_ecn_low_from_dst(sk, dst);
|
|
+
|
|
if (ca_key != TCP_CA_UNSPEC) {
|
|
const struct tcp_congestion_ops *ca;
|
|
|
|
--- a/net/ipv4/tcp_output.c
|
|
+++ b/net/ipv4/tcp_output.c
|
|
@@ -336,10 +336,9 @@ static void tcp_ecn_send_syn(struct sock
|
|
bool bpf_needs_ecn = tcp_bpf_ca_needs_ecn(sk);
|
|
bool use_ecn = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_ecn) == 1 ||
|
|
tcp_ca_needs_ecn(sk) || bpf_needs_ecn;
|
|
+ const struct dst_entry *dst = __sk_dst_get(sk);
|
|
|
|
if (!use_ecn) {
|
|
- const struct dst_entry *dst = __sk_dst_get(sk);
|
|
-
|
|
if (dst && dst_feature(dst, RTAX_FEATURE_ECN))
|
|
use_ecn = true;
|
|
}
|
|
@@ -351,6 +350,9 @@ static void tcp_ecn_send_syn(struct sock
|
|
tp->ecn_flags = TCP_ECN_OK;
|
|
if (tcp_ca_needs_ecn(sk) || bpf_needs_ecn)
|
|
INET_ECN_xmit(sk);
|
|
+
|
|
+ if (dst)
|
|
+ tcp_set_ecn_low_from_dst(sk, dst);
|
|
}
|
|
}
|
|
|