add 3rd party/custom patches
3rd patchs (in alphabetical order): - bbr3 - ntsync5 - openwrt - pf-kernel - xanmod - zen no configuration changes for now
This commit is contained in:
52
debian/patches/misc-bbr3/0001-net-tcp_bbr-broaden-app-limited-rate-sample-detectio.patch
vendored
Normal file
52
debian/patches/misc-bbr3/0001-net-tcp_bbr-broaden-app-limited-rate-sample-detectio.patch
vendored
Normal file
@@ -0,0 +1,52 @@
|
||||
From ce1cd7869a208112a8728d1fe9e373f78a2e4a6e Mon Sep 17 00:00:00 2001
|
||||
From: Neal Cardwell <ncardwell@google.com>
|
||||
Date: Tue, 11 Jun 2019 12:26:55 -0400
|
||||
Subject: [PATCH 01/19] net-tcp_bbr: broaden app-limited rate sample detection
|
||||
|
||||
This commit is a bug fix for the Linux TCP app-limited
|
||||
(application-limited) logic that is used for collecting rate
|
||||
(bandwidth) samples.
|
||||
|
||||
Previously the app-limited logic only looked for "bubbles" of
|
||||
silence in between application writes, by checking at the start
|
||||
of each sendmsg. But "bubbles" of silence can also happen before
|
||||
retransmits: e.g. bubbles can happen between an application write
|
||||
and a retransmit, or between two retransmits.
|
||||
|
||||
Retransmits are triggered by ACKs or timers. So this commit checks
|
||||
for bubbles of app-limited silence upon ACKs or timers.
|
||||
|
||||
Why does this commit check for app-limited state at the start of
|
||||
ACKs and timer handling? Because at that point we know whether
|
||||
inflight was fully using the cwnd. During processing the ACK or
|
||||
timer event we often change the cwnd; after changing the cwnd we
|
||||
can't know whether inflight was fully using the old cwnd.
|
||||
|
||||
Origin-9xx-SHA1: 3fe9b53291e018407780fb8c356adb5666722cbc
|
||||
Change-Id: I37221506f5166877c2b110753d39bb0757985e68
|
||||
Signed-off-by: Alexandre Frade <kernel@xanmod.org>
|
||||
---
|
||||
net/ipv4/tcp_input.c | 1 +
|
||||
net/ipv4/tcp_timer.c | 1 +
|
||||
2 files changed, 2 insertions(+)
|
||||
|
||||
--- a/net/ipv4/tcp_input.c
|
||||
+++ b/net/ipv4/tcp_input.c
|
||||
@@ -3961,6 +3961,7 @@ static int tcp_ack(struct sock *sk, cons
|
||||
|
||||
prior_fack = tcp_is_sack(tp) ? tcp_highest_sack_seq(tp) : tp->snd_una;
|
||||
rs.prior_in_flight = tcp_packets_in_flight(tp);
|
||||
+ tcp_rate_check_app_limited(sk);
|
||||
|
||||
/* ts_recent update must be made after we are sure that the packet
|
||||
* is in window.
|
||||
--- a/net/ipv4/tcp_timer.c
|
||||
+++ b/net/ipv4/tcp_timer.c
|
||||
@@ -689,6 +689,7 @@ void tcp_write_timer_handler(struct sock
|
||||
return;
|
||||
}
|
||||
|
||||
+ tcp_rate_check_app_limited(sk);
|
||||
tcp_mstamp_refresh(tcp_sk(sk));
|
||||
event = icsk->icsk_pending;
|
||||
|
74
debian/patches/misc-bbr3/0002-net-tcp_bbr-v2-shrink-delivered_mstamp-first_tx_msta.patch
vendored
Normal file
74
debian/patches/misc-bbr3/0002-net-tcp_bbr-v2-shrink-delivered_mstamp-first_tx_msta.patch
vendored
Normal file
@@ -0,0 +1,74 @@
|
||||
From b32715fbe2ab96d1060ec37bb9c03feedf366494 Mon Sep 17 00:00:00 2001
|
||||
From: Neal Cardwell <ncardwell@google.com>
|
||||
Date: Sun, 24 Jun 2018 21:55:59 -0400
|
||||
Subject: [PATCH 02/19] net-tcp_bbr: v2: shrink delivered_mstamp,
|
||||
first_tx_mstamp to u32 to free up 8 bytes
|
||||
|
||||
Free up some space for tracking inflight and losses for each
|
||||
bw sample, in upcoming commits.
|
||||
|
||||
These timestamps are in microseconds, and are now stored in 32
|
||||
bits. So they can only hold time intervals up to roughly 2^12 = 4096
|
||||
seconds. But Linux TCP RTT and RTO tracking has the same 32-bit
|
||||
microsecond implementation approach and resulting deployment
|
||||
limitations. So this is not introducing a new limit. And these should
|
||||
not be a limitation for the foreseeable future.
|
||||
|
||||
Effort: net-tcp_bbr
|
||||
Origin-9xx-SHA1: 238a7e6b5d51625fef1ce7769826a7b21b02ae55
|
||||
Change-Id: I3b779603797263b52a61ad57c565eb91fe42680c
|
||||
Signed-off-by: Alexandre Frade <kernel@xanmod.org>
|
||||
---
|
||||
include/net/tcp.h | 9 +++++++--
|
||||
net/ipv4/tcp_rate.c | 7 ++++---
|
||||
2 files changed, 11 insertions(+), 5 deletions(-)
|
||||
|
||||
--- a/include/net/tcp.h
|
||||
+++ b/include/net/tcp.h
|
||||
@@ -884,6 +884,11 @@ static inline u32 tcp_stamp_us_delta(u64
|
||||
return max_t(s64, t1 - t0, 0);
|
||||
}
|
||||
|
||||
+static inline u32 tcp_stamp32_us_delta(u32 t1, u32 t0)
|
||||
+{
|
||||
+ return max_t(s32, t1 - t0, 0);
|
||||
+}
|
||||
+
|
||||
/* provide the departure time in us unit */
|
||||
static inline u64 tcp_skb_timestamp_us(const struct sk_buff *skb)
|
||||
{
|
||||
@@ -973,9 +978,9 @@ struct tcp_skb_cb {
|
||||
/* pkts S/ACKed so far upon tx of skb, incl retrans: */
|
||||
__u32 delivered;
|
||||
/* start of send pipeline phase */
|
||||
- u64 first_tx_mstamp;
|
||||
+ u32 first_tx_mstamp;
|
||||
/* when we reached the "delivered" count */
|
||||
- u64 delivered_mstamp;
|
||||
+ u32 delivered_mstamp;
|
||||
} tx; /* only used for outgoing skbs */
|
||||
union {
|
||||
struct inet_skb_parm h4;
|
||||
--- a/net/ipv4/tcp_rate.c
|
||||
+++ b/net/ipv4/tcp_rate.c
|
||||
@@ -101,8 +101,9 @@ void tcp_rate_skb_delivered(struct sock
|
||||
/* Record send time of most recently ACKed packet: */
|
||||
tp->first_tx_mstamp = tx_tstamp;
|
||||
/* Find the duration of the "send phase" of this window: */
|
||||
- rs->interval_us = tcp_stamp_us_delta(tp->first_tx_mstamp,
|
||||
- scb->tx.first_tx_mstamp);
|
||||
+ rs->interval_us = tcp_stamp32_us_delta(
|
||||
+ tp->first_tx_mstamp,
|
||||
+ scb->tx.first_tx_mstamp);
|
||||
|
||||
}
|
||||
/* Mark off the skb delivered once it's sacked to avoid being
|
||||
@@ -155,7 +156,7 @@ void tcp_rate_gen(struct sock *sk, u32 d
|
||||
* longer phase.
|
||||
*/
|
||||
snd_us = rs->interval_us; /* send phase */
|
||||
- ack_us = tcp_stamp_us_delta(tp->tcp_mstamp,
|
||||
+ ack_us = tcp_stamp32_us_delta(tp->tcp_mstamp,
|
||||
rs->prior_mstamp); /* ack phase */
|
||||
rs->interval_us = max(snd_us, ack_us);
|
||||
|
109
debian/patches/misc-bbr3/0003-net-tcp_bbr-v2-snapshot-packets-in-flight-at-transmi.patch
vendored
Normal file
109
debian/patches/misc-bbr3/0003-net-tcp_bbr-v2-snapshot-packets-in-flight-at-transmi.patch
vendored
Normal file
@@ -0,0 +1,109 @@
|
||||
From 25856231832186fe13189b986cc0e91860c18201 Mon Sep 17 00:00:00 2001
|
||||
From: Neal Cardwell <ncardwell@google.com>
|
||||
Date: Sat, 5 Aug 2017 11:49:50 -0400
|
||||
Subject: [PATCH 03/19] net-tcp_bbr: v2: snapshot packets in flight at transmit
|
||||
time and pass in rate_sample
|
||||
|
||||
CC algorithms may want to snapshot the number of packets in flight at
|
||||
transmit time and pass in rate_sample, to understand the relationship
|
||||
between inflight and losses or ECN signals, to try to find the highest
|
||||
inflight value that has acceptable levels of loss/ECN marking.
|
||||
|
||||
We split out the code to set an skb's tx.in_flight field into its own
|
||||
function, so that this code can be used for the TCP_REPAIR "fake send"
|
||||
code path that inserts skbs into the rtx queue without sending them.
|
||||
|
||||
Effort: net-tcp_bbr
|
||||
Origin-9xx-SHA1: b3eb4f2d20efab4ca001f32c9294739036c493ea
|
||||
Origin-9xx-SHA1: e880fc907d06ea7354333f60f712748ebce9497b
|
||||
Origin-9xx-SHA1: 330f825a08a6fe92cef74d799cc468864c479f63
|
||||
Change-Id: I7314047d0ff14dd261a04b1969a46dc658c8836a
|
||||
Signed-off-by: Alexandre Frade <kernel@xanmod.org>
|
||||
---
|
||||
include/net/tcp.h | 6 ++++++
|
||||
net/ipv4/tcp_output.c | 1 +
|
||||
net/ipv4/tcp_rate.c | 20 ++++++++++++++++++++
|
||||
3 files changed, 27 insertions(+)
|
||||
|
||||
--- a/include/net/tcp.h
|
||||
+++ b/include/net/tcp.h
|
||||
@@ -981,6 +981,10 @@ struct tcp_skb_cb {
|
||||
u32 first_tx_mstamp;
|
||||
/* when we reached the "delivered" count */
|
||||
u32 delivered_mstamp;
|
||||
+#define TCPCB_IN_FLIGHT_BITS 20
|
||||
+#define TCPCB_IN_FLIGHT_MAX ((1U << TCPCB_IN_FLIGHT_BITS) - 1)
|
||||
+ u32 in_flight:20, /* packets in flight at transmit */
|
||||
+ unused2:12;
|
||||
} tx; /* only used for outgoing skbs */
|
||||
union {
|
||||
struct inet_skb_parm h4;
|
||||
@@ -1136,6 +1140,7 @@ struct rate_sample {
|
||||
u64 prior_mstamp; /* starting timestamp for interval */
|
||||
u32 prior_delivered; /* tp->delivered at "prior_mstamp" */
|
||||
u32 prior_delivered_ce;/* tp->delivered_ce at "prior_mstamp" */
|
||||
+ u32 tx_in_flight; /* packets in flight at starting timestamp */
|
||||
s32 delivered; /* number of packets delivered over interval */
|
||||
s32 delivered_ce; /* number of packets delivered w/ CE marks*/
|
||||
long interval_us; /* time for tp->delivered to incr "delivered" */
|
||||
@@ -1258,6 +1263,7 @@ static inline void tcp_ca_event(struct s
|
||||
void tcp_set_ca_state(struct sock *sk, const u8 ca_state);
|
||||
|
||||
/* From tcp_rate.c */
|
||||
+void tcp_set_tx_in_flight(struct sock *sk, struct sk_buff *skb);
|
||||
void tcp_rate_skb_sent(struct sock *sk, struct sk_buff *skb);
|
||||
void tcp_rate_skb_delivered(struct sock *sk, struct sk_buff *skb,
|
||||
struct rate_sample *rs);
|
||||
--- a/net/ipv4/tcp_output.c
|
||||
+++ b/net/ipv4/tcp_output.c
|
||||
@@ -2765,6 +2765,7 @@ static bool tcp_write_xmit(struct sock *
|
||||
skb_set_delivery_time(skb, tp->tcp_wstamp_ns, SKB_CLOCK_MONOTONIC);
|
||||
list_move_tail(&skb->tcp_tsorted_anchor, &tp->tsorted_sent_queue);
|
||||
tcp_init_tso_segs(skb, mss_now);
|
||||
+ tcp_set_tx_in_flight(sk, skb);
|
||||
goto repair; /* Skip network transmission */
|
||||
}
|
||||
|
||||
--- a/net/ipv4/tcp_rate.c
|
||||
+++ b/net/ipv4/tcp_rate.c
|
||||
@@ -34,6 +34,24 @@
|
||||
* ready to send in the write queue.
|
||||
*/
|
||||
|
||||
+void tcp_set_tx_in_flight(struct sock *sk, struct sk_buff *skb)
|
||||
+{
|
||||
+ struct tcp_sock *tp = tcp_sk(sk);
|
||||
+ u32 in_flight;
|
||||
+
|
||||
+ /* Check, sanitize, and record packets in flight after skb was sent. */
|
||||
+ in_flight = tcp_packets_in_flight(tp) + tcp_skb_pcount(skb);
|
||||
+ if (WARN_ONCE(in_flight > TCPCB_IN_FLIGHT_MAX,
|
||||
+ "insane in_flight %u cc %s mss %u "
|
||||
+ "cwnd %u pif %u %u %u %u\n",
|
||||
+ in_flight, inet_csk(sk)->icsk_ca_ops->name,
|
||||
+ tp->mss_cache, tp->snd_cwnd,
|
||||
+ tp->packets_out, tp->retrans_out,
|
||||
+ tp->sacked_out, tp->lost_out))
|
||||
+ in_flight = TCPCB_IN_FLIGHT_MAX;
|
||||
+ TCP_SKB_CB(skb)->tx.in_flight = in_flight;
|
||||
+}
|
||||
+
|
||||
/* Snapshot the current delivery information in the skb, to generate
|
||||
* a rate sample later when the skb is (s)acked in tcp_rate_skb_delivered().
|
||||
*/
|
||||
@@ -67,6 +85,7 @@ void tcp_rate_skb_sent(struct sock *sk,
|
||||
TCP_SKB_CB(skb)->tx.delivered = tp->delivered;
|
||||
TCP_SKB_CB(skb)->tx.delivered_ce = tp->delivered_ce;
|
||||
TCP_SKB_CB(skb)->tx.is_app_limited = tp->app_limited ? 1 : 0;
|
||||
+ tcp_set_tx_in_flight(sk, skb);
|
||||
}
|
||||
|
||||
/* When an skb is sacked or acked, we fill in the rate sample with the (prior)
|
||||
@@ -96,6 +115,7 @@ void tcp_rate_skb_delivered(struct sock
|
||||
rs->prior_mstamp = scb->tx.delivered_mstamp;
|
||||
rs->is_app_limited = scb->tx.is_app_limited;
|
||||
rs->is_retrans = scb->sacked & TCPCB_RETRANS;
|
||||
+ rs->tx_in_flight = scb->tx.in_flight;
|
||||
rs->last_end_seq = scb->end_seq;
|
||||
|
||||
/* Record send time of most recently ACKed packet: */
|
70
debian/patches/misc-bbr3/0004-net-tcp_bbr-v2-count-packets-lost-over-TCP-rate-samp.patch
vendored
Normal file
70
debian/patches/misc-bbr3/0004-net-tcp_bbr-v2-count-packets-lost-over-TCP-rate-samp.patch
vendored
Normal file
@@ -0,0 +1,70 @@
|
||||
From b1772710e8b5b98c09e96d4f1af620cd938fddf7 Mon Sep 17 00:00:00 2001
|
||||
From: Neal Cardwell <ncardwell@google.com>
|
||||
Date: Thu, 12 Oct 2017 23:44:27 -0400
|
||||
Subject: [PATCH 04/19] net-tcp_bbr: v2: count packets lost over TCP rate
|
||||
sampling interval
|
||||
|
||||
For understanding the relationship between inflight and packet loss
|
||||
signals, to try to find the highest inflight value that has acceptable
|
||||
levels of packet losses.
|
||||
|
||||
Effort: net-tcp_bbr
|
||||
Origin-9xx-SHA1: 4527e26b2bd7756a88b5b9ef1ada3da33dd609ab
|
||||
Change-Id: I594c2500868d9c530770e7ddd68ffc87c57f4fd5
|
||||
Signed-off-by: Alexandre Frade <kernel@xanmod.org>
|
||||
---
|
||||
include/net/tcp.h | 5 ++++-
|
||||
net/ipv4/tcp_rate.c | 3 +++
|
||||
2 files changed, 7 insertions(+), 1 deletion(-)
|
||||
|
||||
--- a/include/net/tcp.h
|
||||
+++ b/include/net/tcp.h
|
||||
@@ -985,6 +985,7 @@ struct tcp_skb_cb {
|
||||
#define TCPCB_IN_FLIGHT_MAX ((1U << TCPCB_IN_FLIGHT_BITS) - 1)
|
||||
u32 in_flight:20, /* packets in flight at transmit */
|
||||
unused2:12;
|
||||
+ u32 lost; /* packets lost so far upon tx of skb */
|
||||
} tx; /* only used for outgoing skbs */
|
||||
union {
|
||||
struct inet_skb_parm h4;
|
||||
@@ -1138,11 +1139,13 @@ struct ack_sample {
|
||||
*/
|
||||
struct rate_sample {
|
||||
u64 prior_mstamp; /* starting timestamp for interval */
|
||||
+ u32 prior_lost; /* tp->lost at "prior_mstamp" */
|
||||
u32 prior_delivered; /* tp->delivered at "prior_mstamp" */
|
||||
u32 prior_delivered_ce;/* tp->delivered_ce at "prior_mstamp" */
|
||||
u32 tx_in_flight; /* packets in flight at starting timestamp */
|
||||
+ s32 lost; /* number of packets lost over interval */
|
||||
s32 delivered; /* number of packets delivered over interval */
|
||||
- s32 delivered_ce; /* number of packets delivered w/ CE marks*/
|
||||
+ s32 delivered_ce; /* packets delivered w/ CE mark over interval */
|
||||
long interval_us; /* time for tp->delivered to incr "delivered" */
|
||||
u32 snd_interval_us; /* snd interval for delivered packets */
|
||||
u32 rcv_interval_us; /* rcv interval for delivered packets */
|
||||
--- a/net/ipv4/tcp_rate.c
|
||||
+++ b/net/ipv4/tcp_rate.c
|
||||
@@ -84,6 +84,7 @@ void tcp_rate_skb_sent(struct sock *sk,
|
||||
TCP_SKB_CB(skb)->tx.delivered_mstamp = tp->delivered_mstamp;
|
||||
TCP_SKB_CB(skb)->tx.delivered = tp->delivered;
|
||||
TCP_SKB_CB(skb)->tx.delivered_ce = tp->delivered_ce;
|
||||
+ TCP_SKB_CB(skb)->tx.lost = tp->lost;
|
||||
TCP_SKB_CB(skb)->tx.is_app_limited = tp->app_limited ? 1 : 0;
|
||||
tcp_set_tx_in_flight(sk, skb);
|
||||
}
|
||||
@@ -110,6 +111,7 @@ void tcp_rate_skb_delivered(struct sock
|
||||
if (!rs->prior_delivered ||
|
||||
tcp_skb_sent_after(tx_tstamp, tp->first_tx_mstamp,
|
||||
scb->end_seq, rs->last_end_seq)) {
|
||||
+ rs->prior_lost = scb->tx.lost;
|
||||
rs->prior_delivered_ce = scb->tx.delivered_ce;
|
||||
rs->prior_delivered = scb->tx.delivered;
|
||||
rs->prior_mstamp = scb->tx.delivered_mstamp;
|
||||
@@ -165,6 +167,7 @@ void tcp_rate_gen(struct sock *sk, u32 d
|
||||
return;
|
||||
}
|
||||
rs->delivered = tp->delivered - rs->prior_delivered;
|
||||
+ rs->lost = tp->lost - rs->prior_lost;
|
||||
|
||||
rs->delivered_ce = tp->delivered_ce - rs->prior_delivered_ce;
|
||||
/* delivered_ce occupies less than 32 bits in the skb control block */
|
38
debian/patches/misc-bbr3/0005-net-tcp_bbr-v2-export-FLAG_ECE-in-rate_sample.is_ece.patch
vendored
Normal file
38
debian/patches/misc-bbr3/0005-net-tcp_bbr-v2-export-FLAG_ECE-in-rate_sample.is_ece.patch
vendored
Normal file
@@ -0,0 +1,38 @@
|
||||
From fdf01142aea8645186e080f1278d3b5a5fd8c66c Mon Sep 17 00:00:00 2001
|
||||
From: Neal Cardwell <ncardwell@google.com>
|
||||
Date: Mon, 19 Nov 2018 13:48:36 -0500
|
||||
Subject: [PATCH 05/19] net-tcp_bbr: v2: export FLAG_ECE in rate_sample.is_ece
|
||||
|
||||
For understanding the relationship between inflight and ECN signals,
|
||||
to try to find the highest inflight value that has acceptable levels
|
||||
ECN marking.
|
||||
|
||||
Effort: net-tcp_bbr
|
||||
Origin-9xx-SHA1: 3eba998f2898541406c2666781182200934965a8
|
||||
Change-Id: I3a964e04cee83e11649a54507043d2dfe769a3b3
|
||||
Signed-off-by: Alexandre Frade <kernel@xanmod.org>
|
||||
---
|
||||
include/net/tcp.h | 1 +
|
||||
net/ipv4/tcp_input.c | 1 +
|
||||
2 files changed, 2 insertions(+)
|
||||
|
||||
--- a/include/net/tcp.h
|
||||
+++ b/include/net/tcp.h
|
||||
@@ -1157,6 +1157,7 @@ struct rate_sample {
|
||||
bool is_app_limited; /* is sample from packet with bubble in pipe? */
|
||||
bool is_retrans; /* is sample from retransmission? */
|
||||
bool is_ack_delayed; /* is this (likely) a delayed ACK? */
|
||||
+ bool is_ece; /* did this ACK have ECN marked? */
|
||||
};
|
||||
|
||||
struct tcp_congestion_ops {
|
||||
--- a/net/ipv4/tcp_input.c
|
||||
+++ b/net/ipv4/tcp_input.c
|
||||
@@ -4060,6 +4060,7 @@ static int tcp_ack(struct sock *sk, cons
|
||||
delivered = tcp_newly_delivered(sk, delivered, flag);
|
||||
lost = tp->lost - lost; /* freshly marked lost */
|
||||
rs.is_ack_delayed = !!(flag & FLAG_ACK_MAYBE_DELAYED);
|
||||
+ rs.is_ece = !!(flag & FLAG_ECE);
|
||||
tcp_rate_gen(sk, delivered, lost, is_sack_reneg, sack_state.rate);
|
||||
tcp_cong_control(sk, ack, delivered, flag, sack_state.rate);
|
||||
tcp_xmit_recovery(sk, rexmit);
|
57
debian/patches/misc-bbr3/0006-net-tcp_bbr-v2-introduce-ca_ops-skb_marked_lost-CC-m.patch
vendored
Normal file
57
debian/patches/misc-bbr3/0006-net-tcp_bbr-v2-introduce-ca_ops-skb_marked_lost-CC-m.patch
vendored
Normal file
@@ -0,0 +1,57 @@
|
||||
From a3e88432c2ebf12de9c2053a13417ddf2ad4cb4e Mon Sep 17 00:00:00 2001
|
||||
From: Neal Cardwell <ncardwell@google.com>
|
||||
Date: Tue, 7 Aug 2018 21:52:06 -0400
|
||||
Subject: [PATCH 06/19] net-tcp_bbr: v2: introduce ca_ops->skb_marked_lost() CC
|
||||
module callback API
|
||||
|
||||
For connections experiencing reordering, RACK can mark packets lost
|
||||
long after we receive the SACKs/ACKs hinting that the packets were
|
||||
actually lost.
|
||||
|
||||
This means that CC modules cannot easily learn the volume of inflight
|
||||
data at which packet loss happens by looking at the current inflight
|
||||
or even the packets in flight when the most recently SACKed packet was
|
||||
sent. To learn this, CC modules need to know how many packets were in
|
||||
flight at the time lost packets were sent. This new callback, combined
|
||||
with TCP_SKB_CB(skb)->tx.in_flight, allows them to learn this.
|
||||
|
||||
This also provides a consistent callback that is invoked whether
|
||||
packets are marked lost upon ACK processing, using the RACK reordering
|
||||
timer, or at RTO time.
|
||||
|
||||
Effort: net-tcp_bbr
|
||||
Origin-9xx-SHA1: afcbebe3374e4632ac6714d39e4dc8a8455956f4
|
||||
Change-Id: I54826ab53df636be537e5d3c618a46145d12d51a
|
||||
Signed-off-by: Alexandre Frade <kernel@xanmod.org>
|
||||
---
|
||||
include/net/tcp.h | 3 +++
|
||||
net/ipv4/tcp_input.c | 5 +++++
|
||||
2 files changed, 8 insertions(+)
|
||||
|
||||
--- a/include/net/tcp.h
|
||||
+++ b/include/net/tcp.h
|
||||
@@ -1184,6 +1184,9 @@ struct tcp_congestion_ops {
|
||||
/* override sysctl_tcp_min_tso_segs */
|
||||
u32 (*min_tso_segs)(struct sock *sk);
|
||||
|
||||
+ /* react to a specific lost skb (optional) */
|
||||
+ void (*skb_marked_lost)(struct sock *sk, const struct sk_buff *skb);
|
||||
+
|
||||
/* call when packets are delivered to update cwnd and pacing rate,
|
||||
* after all the ca_state processing. (optional)
|
||||
*/
|
||||
--- a/net/ipv4/tcp_input.c
|
||||
+++ b/net/ipv4/tcp_input.c
|
||||
@@ -1120,7 +1120,12 @@ static void tcp_verify_retransmit_hint(s
|
||||
*/
|
||||
static void tcp_notify_skb_loss_event(struct tcp_sock *tp, const struct sk_buff *skb)
|
||||
{
|
||||
+ struct sock *sk = (struct sock *)tp;
|
||||
+ const struct tcp_congestion_ops *ca_ops = inet_csk(sk)->icsk_ca_ops;
|
||||
+
|
||||
tp->lost += tcp_skb_pcount(skb);
|
||||
+ if (ca_ops->skb_marked_lost)
|
||||
+ ca_ops->skb_marked_lost(sk, skb);
|
||||
}
|
||||
|
||||
void tcp_mark_skb_lost(struct sock *sk, struct sk_buff *skb)
|
59
debian/patches/misc-bbr3/0007-net-tcp_bbr-v2-adjust-skb-tx.in_flight-upon-merge-in.patch
vendored
Normal file
59
debian/patches/misc-bbr3/0007-net-tcp_bbr-v2-adjust-skb-tx.in_flight-upon-merge-in.patch
vendored
Normal file
@@ -0,0 +1,59 @@
|
||||
From af7d33e71649b8e2ae00dccf336720a8ab891606 Mon Sep 17 00:00:00 2001
|
||||
From: Neal Cardwell <ncardwell@google.com>
|
||||
Date: Wed, 1 May 2019 20:16:33 -0400
|
||||
Subject: [PATCH 07/19] net-tcp_bbr: v2: adjust skb tx.in_flight upon merge in
|
||||
tcp_shifted_skb()
|
||||
|
||||
When tcp_shifted_skb() updates state as adjacent SACKed skbs are
|
||||
coalesced, previously the tx.in_flight was not adjusted, so we could
|
||||
get contradictory state where the skb's recorded pcount was bigger
|
||||
than the tx.in_flight (the number of segments that were in_flight
|
||||
after sending the skb).
|
||||
|
||||
Normally have a SACKed skb with contradictory pcount/tx.in_flight
|
||||
would not matter. However, with SACK reneging, the SACKed bit is
|
||||
removed, and an skb once again becomes eligible for retransmitting,
|
||||
fragmenting, SACKing, etc. Packetdrill testing verified the following
|
||||
sequence is possible in a kernel that does not have this commit:
|
||||
|
||||
- skb N is SACKed
|
||||
- skb N+1 is SACKed and combined with skb N using tcp_shifted_skb()
|
||||
- tcp_shifted_skb() will increase the pcount of prev,
|
||||
but leave tx.in_flight as-is
|
||||
- so prev skb can have pcount > tx.in_flight
|
||||
- RTO, tcp_timeout_mark_lost(), detect reneg,
|
||||
remove "SACKed" bit, mark skb N as lost
|
||||
- find pcount of skb N is greater than its tx.in_flight
|
||||
|
||||
I suspect this issue iw what caused the bbr2_inflight_hi_from_lost_skb():
|
||||
WARN_ON_ONCE(inflight_prev < 0)
|
||||
to fire in production machines using bbr2.
|
||||
|
||||
Effort: net-tcp_bbr
|
||||
Origin-9xx-SHA1: 1a3e997e613d2dcf32b947992882854ebe873715
|
||||
Change-Id: I1b0b75c27519953430c7db51c6f358f104c7af55
|
||||
Signed-off-by: Alexandre Frade <kernel@xanmod.org>
|
||||
---
|
||||
net/ipv4/tcp_input.c | 11 +++++++++++
|
||||
1 file changed, 11 insertions(+)
|
||||
|
||||
--- a/net/ipv4/tcp_input.c
|
||||
+++ b/net/ipv4/tcp_input.c
|
||||
@@ -1506,6 +1506,17 @@ static bool tcp_shifted_skb(struct sock
|
||||
WARN_ON_ONCE(tcp_skb_pcount(skb) < pcount);
|
||||
tcp_skb_pcount_add(skb, -pcount);
|
||||
|
||||
+ /* Adjust tx.in_flight as pcount is shifted from skb to prev. */
|
||||
+ if (WARN_ONCE(TCP_SKB_CB(skb)->tx.in_flight < pcount,
|
||||
+ "prev in_flight: %u skb in_flight: %u pcount: %u",
|
||||
+ TCP_SKB_CB(prev)->tx.in_flight,
|
||||
+ TCP_SKB_CB(skb)->tx.in_flight,
|
||||
+ pcount))
|
||||
+ TCP_SKB_CB(skb)->tx.in_flight = 0;
|
||||
+ else
|
||||
+ TCP_SKB_CB(skb)->tx.in_flight -= pcount;
|
||||
+ TCP_SKB_CB(prev)->tx.in_flight += pcount;
|
||||
+
|
||||
/* When we're adding to gso_segs == 1, gso_size will be zero,
|
||||
* in theory this shouldn't be necessary but as long as DSACK
|
||||
* code can come after this skb later on it's better to keep
|
97
debian/patches/misc-bbr3/0008-net-tcp_bbr-v2-adjust-skb-tx.in_flight-upon-split-in.patch
vendored
Normal file
97
debian/patches/misc-bbr3/0008-net-tcp_bbr-v2-adjust-skb-tx.in_flight-upon-split-in.patch
vendored
Normal file
@@ -0,0 +1,97 @@
|
||||
From a4d44bce49f61f8755f558dc40edff5f8958b7c6 Mon Sep 17 00:00:00 2001
|
||||
From: Neal Cardwell <ncardwell@google.com>
|
||||
Date: Wed, 1 May 2019 20:16:25 -0400
|
||||
Subject: [PATCH 08/19] net-tcp_bbr: v2: adjust skb tx.in_flight upon split in
|
||||
tcp_fragment()
|
||||
|
||||
When we fragment an skb that has already been sent, we need to update
|
||||
the tx.in_flight for the first skb in the resulting pair ("buff").
|
||||
|
||||
Because we were not updating the tx.in_flight, the tx.in_flight value
|
||||
was inconsistent with the pcount of the "buff" skb (tx.in_flight would
|
||||
be too high). That meant that if the "buff" skb was lost, then
|
||||
bbr2_inflight_hi_from_lost_skb() would calculate an inflight_hi value
|
||||
that is too high. This could result in longer queues and higher packet
|
||||
loss.
|
||||
|
||||
Packetdrill testing verified that without this commit, when the second
|
||||
half of an skb is SACKed and then later the first half of that skb is
|
||||
marked lost, the calculated inflight_hi was incorrect.
|
||||
|
||||
Effort: net-tcp_bbr
|
||||
Origin-9xx-SHA1: 385f1ddc610798fab2837f9f372857438b25f874
|
||||
Origin-9xx-SHA1: a0eb099690af net-tcp_bbr: v2: fix tcp_fragment() tx.in_flight recomputation [prod feb 8 2021; use as a fixup]
|
||||
Origin-9xx-SHA1: 885503228153ff0c9114e net-tcp_bbr: v2: introduce tcp_skb_tx_in_flight_is_suspicious() helper for warnings
|
||||
Change-Id: I617f8cab4e9be7a0b8e8d30b047bf8645393354d
|
||||
Signed-off-by: Alexandre Frade <kernel@xanmod.org>
|
||||
---
|
||||
include/net/tcp.h | 15 +++++++++++++++
|
||||
net/ipv4/tcp_output.c | 26 +++++++++++++++++++++++++-
|
||||
2 files changed, 40 insertions(+), 1 deletion(-)
|
||||
|
||||
--- a/include/net/tcp.h
|
||||
+++ b/include/net/tcp.h
|
||||
@@ -1283,6 +1283,21 @@ static inline bool tcp_skb_sent_after(u6
|
||||
return t1 > t2 || (t1 == t2 && after(seq1, seq2));
|
||||
}
|
||||
|
||||
+/* If a retransmit failed due to local qdisc congestion or other local issues,
|
||||
+ * then we may have called tcp_set_skb_tso_segs() to increase the number of
|
||||
+ * segments in the skb without increasing the tx.in_flight. In all other cases,
|
||||
+ * the tx.in_flight should be at least as big as the pcount of the sk_buff. We
|
||||
+ * do not have the state to know whether a retransmit failed due to local qdisc
|
||||
+ * congestion or other local issues, so to avoid spurious warnings we consider
|
||||
+ * that any skb marked lost may have suffered that fate.
|
||||
+ */
|
||||
+static inline bool tcp_skb_tx_in_flight_is_suspicious(u32 skb_pcount,
|
||||
+ u32 skb_sacked_flags,
|
||||
+ u32 tx_in_flight)
|
||||
+{
|
||||
+ return (skb_pcount > tx_in_flight) && !(skb_sacked_flags & TCPCB_LOST);
|
||||
+}
|
||||
+
|
||||
/* These functions determine how the current flow behaves in respect of SACK
|
||||
* handling. SACK is negotiated with the peer, and therefore it can vary
|
||||
* between different flows.
|
||||
--- a/net/ipv4/tcp_output.c
|
||||
+++ b/net/ipv4/tcp_output.c
|
||||
@@ -1601,7 +1601,7 @@ int tcp_fragment(struct sock *sk, enum t
|
||||
{
|
||||
struct tcp_sock *tp = tcp_sk(sk);
|
||||
struct sk_buff *buff;
|
||||
- int old_factor;
|
||||
+ int old_factor, inflight_prev;
|
||||
long limit;
|
||||
int nlen;
|
||||
u8 flags;
|
||||
@@ -1676,6 +1676,30 @@ int tcp_fragment(struct sock *sk, enum t
|
||||
|
||||
if (diff)
|
||||
tcp_adjust_pcount(sk, skb, diff);
|
||||
+
|
||||
+ inflight_prev = TCP_SKB_CB(skb)->tx.in_flight - old_factor;
|
||||
+ if (inflight_prev < 0) {
|
||||
+ WARN_ONCE(tcp_skb_tx_in_flight_is_suspicious(
|
||||
+ old_factor,
|
||||
+ TCP_SKB_CB(skb)->sacked,
|
||||
+ TCP_SKB_CB(skb)->tx.in_flight),
|
||||
+ "inconsistent: tx.in_flight: %u "
|
||||
+ "old_factor: %d mss: %u sacked: %u "
|
||||
+ "1st pcount: %d 2nd pcount: %d "
|
||||
+ "1st len: %u 2nd len: %u ",
|
||||
+ TCP_SKB_CB(skb)->tx.in_flight, old_factor,
|
||||
+ mss_now, TCP_SKB_CB(skb)->sacked,
|
||||
+ tcp_skb_pcount(skb), tcp_skb_pcount(buff),
|
||||
+ skb->len, buff->len);
|
||||
+ inflight_prev = 0;
|
||||
+ }
|
||||
+ /* Set 1st tx.in_flight as if 1st were sent by itself: */
|
||||
+ TCP_SKB_CB(skb)->tx.in_flight = inflight_prev +
|
||||
+ tcp_skb_pcount(skb);
|
||||
+ /* Set 2nd tx.in_flight with new 1st and 2nd pcounts: */
|
||||
+ TCP_SKB_CB(buff)->tx.in_flight = inflight_prev +
|
||||
+ tcp_skb_pcount(skb) +
|
||||
+ tcp_skb_pcount(buff);
|
||||
}
|
||||
|
||||
/* Link BUFF into the send queue. */
|
73
debian/patches/misc-bbr3/0009-net-tcp-add-new-ca-opts-flag-TCP_CONG_WANTS_CE_EVENT.patch
vendored
Normal file
73
debian/patches/misc-bbr3/0009-net-tcp-add-new-ca-opts-flag-TCP_CONG_WANTS_CE_EVENT.patch
vendored
Normal file
@@ -0,0 +1,73 @@
|
||||
From 65cca0e8fd954a150ec874650af47f7800ea3049 Mon Sep 17 00:00:00 2001
|
||||
From: Yousuk Seung <ysseung@google.com>
|
||||
Date: Wed, 23 May 2018 17:55:54 -0700
|
||||
Subject: [PATCH 09/19] net-tcp: add new ca opts flag TCP_CONG_WANTS_CE_EVENTS
|
||||
|
||||
Add a a new ca opts flag TCP_CONG_WANTS_CE_EVENTS that allows a
|
||||
congestion control module to receive CE events.
|
||||
|
||||
Currently congestion control modules have to set the TCP_CONG_NEEDS_ECN
|
||||
bit in opts flag to receive CE events but this may incur changes in ECN
|
||||
behavior elsewhere. This patch adds a new bit TCP_CONG_WANTS_CE_EVENTS
|
||||
that allows congestion control modules to receive CE events
|
||||
independently of TCP_CONG_NEEDS_ECN.
|
||||
|
||||
Effort: net-tcp
|
||||
Origin-9xx-SHA1: 9f7e14716cde760bc6c67ef8ef7e1ee48501d95b
|
||||
Change-Id: I2255506985242f376d910c6fd37daabaf4744f24
|
||||
Signed-off-by: Alexandre Frade <kernel@xanmod.org>
|
||||
---
|
||||
include/net/tcp.h | 14 +++++++++++++-
|
||||
net/ipv4/tcp_input.c | 4 ++--
|
||||
2 files changed, 15 insertions(+), 3 deletions(-)
|
||||
|
||||
--- a/include/net/tcp.h
|
||||
+++ b/include/net/tcp.h
|
||||
@@ -1119,7 +1119,11 @@ enum tcp_ca_ack_event_flags {
|
||||
#define TCP_CONG_NON_RESTRICTED 0x1
|
||||
/* Requires ECN/ECT set on all packets */
|
||||
#define TCP_CONG_NEEDS_ECN 0x2
|
||||
-#define TCP_CONG_MASK (TCP_CONG_NON_RESTRICTED | TCP_CONG_NEEDS_ECN)
|
||||
+/* Wants notification of CE events (CA_EVENT_ECN_IS_CE, CA_EVENT_ECN_NO_CE). */
|
||||
+#define TCP_CONG_WANTS_CE_EVENTS 0x4
|
||||
+#define TCP_CONG_MASK (TCP_CONG_NON_RESTRICTED | \
|
||||
+ TCP_CONG_NEEDS_ECN | \
|
||||
+ TCP_CONG_WANTS_CE_EVENTS)
|
||||
|
||||
union tcp_cc_info;
|
||||
|
||||
@@ -1251,6 +1255,14 @@ static inline char *tcp_ca_get_name_by_k
|
||||
}
|
||||
#endif
|
||||
|
||||
+static inline bool tcp_ca_wants_ce_events(const struct sock *sk)
|
||||
+{
|
||||
+ const struct inet_connection_sock *icsk = inet_csk(sk);
|
||||
+
|
||||
+ return icsk->icsk_ca_ops->flags & (TCP_CONG_NEEDS_ECN |
|
||||
+ TCP_CONG_WANTS_CE_EVENTS);
|
||||
+}
|
||||
+
|
||||
static inline bool tcp_ca_needs_ecn(const struct sock *sk)
|
||||
{
|
||||
const struct inet_connection_sock *icsk = inet_csk(sk);
|
||||
--- a/net/ipv4/tcp_input.c
|
||||
+++ b/net/ipv4/tcp_input.c
|
||||
@@ -370,7 +370,7 @@ static void __tcp_ecn_check_ce(struct so
|
||||
tcp_enter_quickack_mode(sk, 2);
|
||||
break;
|
||||
case INET_ECN_CE:
|
||||
- if (tcp_ca_needs_ecn(sk))
|
||||
+ if (tcp_ca_wants_ce_events(sk))
|
||||
tcp_ca_event(sk, CA_EVENT_ECN_IS_CE);
|
||||
|
||||
if (!(tp->ecn_flags & TCP_ECN_DEMAND_CWR)) {
|
||||
@@ -381,7 +381,7 @@ static void __tcp_ecn_check_ce(struct so
|
||||
tp->ecn_flags |= TCP_ECN_SEEN;
|
||||
break;
|
||||
default:
|
||||
- if (tcp_ca_needs_ecn(sk))
|
||||
+ if (tcp_ca_wants_ce_events(sk))
|
||||
tcp_ca_event(sk, CA_EVENT_ECN_NO_CE);
|
||||
tp->ecn_flags |= TCP_ECN_SEEN;
|
||||
break;
|
118
debian/patches/misc-bbr3/0010-net-tcp-re-generalize-TSO-sizing-in-TCP-CC-module-AP.patch
vendored
Normal file
118
debian/patches/misc-bbr3/0010-net-tcp-re-generalize-TSO-sizing-in-TCP-CC-module-AP.patch
vendored
Normal file
@@ -0,0 +1,118 @@
|
||||
From 3acb852e1cfcdeea388bd428c6dd81609fd40792 Mon Sep 17 00:00:00 2001
|
||||
From: Neal Cardwell <ncardwell@google.com>
|
||||
Date: Fri, 27 Sep 2019 17:10:26 -0400
|
||||
Subject: [PATCH 10/19] net-tcp: re-generalize TSO sizing in TCP CC module API
|
||||
|
||||
Reorganize the API for CC modules so that the CC module once again
|
||||
gets complete control of the TSO sizing decision. This is how the API
|
||||
was set up around 2016 and the initial BBRv1 upstreaming. Later Eric
|
||||
Dumazet simplified it. But with wider testing it now seems that to
|
||||
avoid CPU regressions BBR needs to have a different TSO sizing
|
||||
function.
|
||||
|
||||
This is necessary to handle cases where there are many flows
|
||||
bottlenecked on the sender host's NIC, in which case BBR's pacing rate
|
||||
is much lower than CUBIC/Reno/DCTCP's. Why does this happen? Because
|
||||
BBR's pacing rate adapts to the low bandwidth share each flow sees. By
|
||||
contrast, CUBIC/Reno/DCTCP see no loss or ECN, so they grow a very
|
||||
large cwnd, and thus large pacing rate and large TSO burst size.
|
||||
|
||||
Change-Id: Ic8ccfdbe4010ee8d4bf6a6334c48a2fceb2171ea
|
||||
Signed-off-by: Alexandre Frade <kernel@xanmod.org>
|
||||
---
|
||||
include/net/tcp.h | 4 ++--
|
||||
net/ipv4/tcp_bbr.c | 37 ++++++++++++++++++++++++++-----------
|
||||
net/ipv4/tcp_output.c | 11 +++++------
|
||||
3 files changed, 33 insertions(+), 19 deletions(-)
|
||||
|
||||
--- a/include/net/tcp.h
|
||||
+++ b/include/net/tcp.h
|
||||
@@ -1185,8 +1185,8 @@ struct tcp_congestion_ops {
|
||||
/* hook for packet ack accounting (optional) */
|
||||
void (*pkts_acked)(struct sock *sk, const struct ack_sample *sample);
|
||||
|
||||
- /* override sysctl_tcp_min_tso_segs */
|
||||
- u32 (*min_tso_segs)(struct sock *sk);
|
||||
+ /* pick target number of segments per TSO/GSO skb (optional): */
|
||||
+ u32 (*tso_segs)(struct sock *sk, unsigned int mss_now);
|
||||
|
||||
/* react to a specific lost skb (optional) */
|
||||
void (*skb_marked_lost)(struct sock *sk, const struct sk_buff *skb);
|
||||
--- a/net/ipv4/tcp_bbr.c
|
||||
+++ b/net/ipv4/tcp_bbr.c
|
||||
@@ -301,20 +301,35 @@ __bpf_kfunc static u32 bbr_min_tso_segs(
|
||||
return READ_ONCE(sk->sk_pacing_rate) < (bbr_min_tso_rate >> 3) ? 1 : 2;
|
||||
}
|
||||
|
||||
+/* Return the number of segments BBR would like in a TSO/GSO skb, given
|
||||
+ * a particular max gso size as a constraint.
|
||||
+ */
|
||||
+static u32 bbr_tso_segs_generic(struct sock *sk, unsigned int mss_now,
|
||||
+ u32 gso_max_size)
|
||||
+{
|
||||
+ u32 segs;
|
||||
+ u64 bytes;
|
||||
+
|
||||
+ /* Budget a TSO/GSO burst size allowance based on bw (pacing_rate). */
|
||||
+ bytes = READ_ONCE(sk->sk_pacing_rate) >> READ_ONCE(sk->sk_pacing_shift);
|
||||
+
|
||||
+ bytes = min_t(u32, bytes, gso_max_size - 1 - MAX_TCP_HEADER);
|
||||
+ segs = max_t(u32, bytes / mss_now, bbr_min_tso_segs(sk));
|
||||
+ return segs;
|
||||
+}
|
||||
+
|
||||
+/* Custom tcp_tso_autosize() for BBR, used at transmit time to cap skb size. */
|
||||
+static u32 bbr_tso_segs(struct sock *sk, unsigned int mss_now)
|
||||
+{
|
||||
+ return bbr_tso_segs_generic(sk, mss_now, sk->sk_gso_max_size);
|
||||
+}
|
||||
+
|
||||
+/* Like bbr_tso_segs(), using mss_cache, ignoring driver's sk_gso_max_size. */
|
||||
static u32 bbr_tso_segs_goal(struct sock *sk)
|
||||
{
|
||||
struct tcp_sock *tp = tcp_sk(sk);
|
||||
- u32 segs, bytes;
|
||||
-
|
||||
- /* Sort of tcp_tso_autosize() but ignoring
|
||||
- * driver provided sk_gso_max_size.
|
||||
- */
|
||||
- bytes = min_t(unsigned long,
|
||||
- READ_ONCE(sk->sk_pacing_rate) >> READ_ONCE(sk->sk_pacing_shift),
|
||||
- GSO_LEGACY_MAX_SIZE - 1 - MAX_TCP_HEADER);
|
||||
- segs = max_t(u32, bytes / tp->mss_cache, bbr_min_tso_segs(sk));
|
||||
|
||||
- return min(segs, 0x7FU);
|
||||
+ return bbr_tso_segs_generic(sk, tp->mss_cache, GSO_MAX_SIZE);
|
||||
}
|
||||
|
||||
/* Save "last known good" cwnd so we can restore it after losses or PROBE_RTT */
|
||||
@@ -1150,7 +1165,7 @@ static struct tcp_congestion_ops tcp_bbr
|
||||
.undo_cwnd = bbr_undo_cwnd,
|
||||
.cwnd_event = bbr_cwnd_event,
|
||||
.ssthresh = bbr_ssthresh,
|
||||
- .min_tso_segs = bbr_min_tso_segs,
|
||||
+ .tso_segs = bbr_tso_segs,
|
||||
.get_info = bbr_get_info,
|
||||
.set_state = bbr_set_state,
|
||||
};
|
||||
--- a/net/ipv4/tcp_output.c
|
||||
+++ b/net/ipv4/tcp_output.c
|
||||
@@ -2057,13 +2057,12 @@ static u32 tcp_tso_autosize(const struct
|
||||
static u32 tcp_tso_segs(struct sock *sk, unsigned int mss_now)
|
||||
{
|
||||
const struct tcp_congestion_ops *ca_ops = inet_csk(sk)->icsk_ca_ops;
|
||||
- u32 min_tso, tso_segs;
|
||||
+ u32 tso_segs;
|
||||
|
||||
- min_tso = ca_ops->min_tso_segs ?
|
||||
- ca_ops->min_tso_segs(sk) :
|
||||
- READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_min_tso_segs);
|
||||
-
|
||||
- tso_segs = tcp_tso_autosize(sk, mss_now, min_tso);
|
||||
+ tso_segs = ca_ops->tso_segs ?
|
||||
+ ca_ops->tso_segs(sk, mss_now) :
|
||||
+ tcp_tso_autosize(sk, mss_now,
|
||||
+ sock_net(sk)->ipv4.sysctl_tcp_min_tso_segs);
|
||||
return min_t(u32, tso_segs, sk->sk_gso_max_segs);
|
||||
}
|
||||
|
72
debian/patches/misc-bbr3/0011-net-tcp-add-fast_ack_mode-1-skip-rwin-check-in-tcp_f.patch
vendored
Normal file
72
debian/patches/misc-bbr3/0011-net-tcp-add-fast_ack_mode-1-skip-rwin-check-in-tcp_f.patch
vendored
Normal file
@@ -0,0 +1,72 @@
|
||||
From 3741ada76bab5111cbb9c279cf27e67a0334eb05 Mon Sep 17 00:00:00 2001
|
||||
From: Neal Cardwell <ncardwell@google.com>
|
||||
Date: Sun, 7 Jan 2024 21:11:26 -0300
|
||||
Subject: [PATCH 11/19] net-tcp: add fast_ack_mode=1: skip rwin check in
|
||||
tcp_fast_ack_mode__tcp_ack_snd_check()
|
||||
|
||||
Add logic for an experimental TCP connection behavior, enabled with
|
||||
tp->fast_ack_mode = 1, which disables checking the receive window
|
||||
before sending an ack in __tcp_ack_snd_check(). If this behavior is
|
||||
enabled, the data receiver sends an ACK if the amount of data is >
|
||||
RCV.MSS.
|
||||
|
||||
Change-Id: Iaa0a0fd7108221f883137a79d5bfa724f1b096d4
|
||||
Signed-off-by: Alexandre Frade <kernel@xanmod.org>
|
||||
---
|
||||
include/linux/tcp.h | 3 ++-
|
||||
net/ipv4/tcp.c | 1 +
|
||||
net/ipv4/tcp_cong.c | 1 +
|
||||
net/ipv4/tcp_input.c | 5 +++--
|
||||
4 files changed, 7 insertions(+), 3 deletions(-)
|
||||
|
||||
--- a/include/linux/tcp.h
|
||||
+++ b/include/linux/tcp.h
|
||||
@@ -369,7 +369,8 @@ struct tcp_sock {
|
||||
u8 compressed_ack;
|
||||
u8 dup_ack_counter:2,
|
||||
tlp_retrans:1, /* TLP is a retransmission */
|
||||
- unused:5;
|
||||
+ fast_ack_mode:2, /* which fast ack mode ? */
|
||||
+ unused:3;
|
||||
u8 thin_lto : 1,/* Use linear timeouts for thin streams */
|
||||
fastopen_connect:1, /* FASTOPEN_CONNECT sockopt */
|
||||
fastopen_no_cookie:1, /* Allow send/recv SYN+data without a cookie */
|
||||
--- a/net/ipv4/tcp.c
|
||||
+++ b/net/ipv4/tcp.c
|
||||
@@ -3123,6 +3123,7 @@ int tcp_disconnect(struct sock *sk, int
|
||||
tp->rx_opt.dsack = 0;
|
||||
tp->rx_opt.num_sacks = 0;
|
||||
tp->rcv_ooopack = 0;
|
||||
+ tp->fast_ack_mode = 0;
|
||||
|
||||
|
||||
/* Clean up fastopen related fields */
|
||||
--- a/net/ipv4/tcp_cong.c
|
||||
+++ b/net/ipv4/tcp_cong.c
|
||||
@@ -237,6 +237,7 @@ void tcp_init_congestion_control(struct
|
||||
struct inet_connection_sock *icsk = inet_csk(sk);
|
||||
|
||||
tcp_sk(sk)->prior_ssthresh = 0;
|
||||
+ tcp_sk(sk)->fast_ack_mode = 0;
|
||||
if (icsk->icsk_ca_ops->init)
|
||||
icsk->icsk_ca_ops->init(sk);
|
||||
if (tcp_ca_needs_ecn(sk))
|
||||
--- a/net/ipv4/tcp_input.c
|
||||
+++ b/net/ipv4/tcp_input.c
|
||||
@@ -5763,13 +5763,14 @@ static void __tcp_ack_snd_check(struct s
|
||||
|
||||
/* More than one full frame received... */
|
||||
if (((tp->rcv_nxt - tp->rcv_wup) > inet_csk(sk)->icsk_ack.rcv_mss &&
|
||||
+ (tp->fast_ack_mode == 1 ||
|
||||
/* ... and right edge of window advances far enough.
|
||||
* (tcp_recvmsg() will send ACK otherwise).
|
||||
* If application uses SO_RCVLOWAT, we want send ack now if
|
||||
* we have not received enough bytes to satisfy the condition.
|
||||
*/
|
||||
- (tp->rcv_nxt - tp->copied_seq < sk->sk_rcvlowat ||
|
||||
- __tcp_select_window(sk) >= tp->rcv_wnd)) ||
|
||||
+ (tp->rcv_nxt - tp->copied_seq < sk->sk_rcvlowat ||
|
||||
+ __tcp_select_window(sk) >= tp->rcv_wnd))) ||
|
||||
/* We ACK each frame or... */
|
||||
tcp_in_quickack_mode(sk) ||
|
||||
/* Protocol state mandates a one-time immediate ACK */
|
45
debian/patches/misc-bbr3/0012-net-tcp_bbr-v2-record-app-limited-status-of-TLP-repa.patch
vendored
Normal file
45
debian/patches/misc-bbr3/0012-net-tcp_bbr-v2-record-app-limited-status-of-TLP-repa.patch
vendored
Normal file
@@ -0,0 +1,45 @@
|
||||
From e5d35b7c882b7001f8a31b14c9f08917230dedc3 Mon Sep 17 00:00:00 2001
|
||||
From: Jianfeng Wang <jfwang@google.com>
|
||||
Date: Fri, 19 Jun 2020 17:33:45 +0000
|
||||
Subject: [PATCH 12/19] net-tcp_bbr: v2: record app-limited status of
|
||||
TLP-repaired flight
|
||||
|
||||
When sending a TLP retransmit, record whether the outstanding flight
|
||||
of data is application limited. This is important for congestion
|
||||
control modules that want to respond to losses repaired by TLP
|
||||
retransmits. This is important because the following scenarios convey
|
||||
very different information:
|
||||
(1) a packet loss with a small number of packets in flight;
|
||||
(2) a packet loss with the maximum amount of data in flight allowed
|
||||
by the CC module;
|
||||
|
||||
Effort: net-tcp_bbr
|
||||
Change-Id: Ic8ae567caa4e4bfd5fd82c3d4be12a5d9171655e
|
||||
Signed-off-by: Alexandre Frade <kernel@xanmod.org>
|
||||
---
|
||||
include/linux/tcp.h | 3 ++-
|
||||
net/ipv4/tcp_output.c | 1 +
|
||||
2 files changed, 3 insertions(+), 1 deletion(-)
|
||||
|
||||
--- a/include/linux/tcp.h
|
||||
+++ b/include/linux/tcp.h
|
||||
@@ -370,7 +370,8 @@ struct tcp_sock {
|
||||
u8 dup_ack_counter:2,
|
||||
tlp_retrans:1, /* TLP is a retransmission */
|
||||
fast_ack_mode:2, /* which fast ack mode ? */
|
||||
- unused:3;
|
||||
+ tlp_orig_data_app_limited:1, /* app-limited before TLP rtx? */
|
||||
+ unused:2;
|
||||
u8 thin_lto : 1,/* Use linear timeouts for thin streams */
|
||||
fastopen_connect:1, /* FASTOPEN_CONNECT sockopt */
|
||||
fastopen_no_cookie:1, /* Allow send/recv SYN+data without a cookie */
|
||||
--- a/net/ipv4/tcp_output.c
|
||||
+++ b/net/ipv4/tcp_output.c
|
||||
@@ -3003,6 +3003,7 @@ void tcp_send_loss_probe(struct sock *sk
|
||||
if (WARN_ON(!skb || !tcp_skb_pcount(skb)))
|
||||
goto rearm_timer;
|
||||
|
||||
+ tp->tlp_orig_data_app_limited = TCP_SKB_CB(skb)->tx.is_app_limited;
|
||||
if (__tcp_retransmit_skb(sk, skb, 1))
|
||||
goto rearm_timer;
|
||||
|
45
debian/patches/misc-bbr3/0013-net-tcp_bbr-v2-inform-CC-module-of-losses-repaired-b.patch
vendored
Normal file
45
debian/patches/misc-bbr3/0013-net-tcp_bbr-v2-inform-CC-module-of-losses-repaired-b.patch
vendored
Normal file
@@ -0,0 +1,45 @@
|
||||
From 77e7c22b63f8934206b1e89c173558c3967f0779 Mon Sep 17 00:00:00 2001
|
||||
From: Jianfeng Wang <jfwang@google.com>
|
||||
Date: Tue, 16 Jun 2020 17:41:19 +0000
|
||||
Subject: [PATCH 13/19] net-tcp_bbr: v2: inform CC module of losses repaired by
|
||||
TLP probe
|
||||
|
||||
Before this commit, when there is a packet loss that creates a sequence
|
||||
hole that is filled by a TLP loss probe, then tcp_process_tlp_ack()
|
||||
only informs the congestion control (CC) module via a back-to-back entry
|
||||
and exit of CWR. But some congestion control modules (e.g. BBR) do not
|
||||
respond to CWR events.
|
||||
|
||||
This commit adds a new CA event with which the core TCP stack notifies
|
||||
the CC module when a loss is repaired by a TLP. This will allow CC
|
||||
modules that do not use the CWR mechanism to have a custom handler for
|
||||
such TLP recoveries.
|
||||
|
||||
Effort: net-tcp_bbr
|
||||
Change-Id: Ieba72332b401b329bff5a641d2b2043a3fb8f632
|
||||
Signed-off-by: Alexandre Frade <kernel@xanmod.org>
|
||||
---
|
||||
include/net/tcp.h | 1 +
|
||||
net/ipv4/tcp_input.c | 1 +
|
||||
2 files changed, 2 insertions(+)
|
||||
|
||||
--- a/include/net/tcp.h
|
||||
+++ b/include/net/tcp.h
|
||||
@@ -1097,6 +1097,7 @@ enum tcp_ca_event {
|
||||
CA_EVENT_LOSS, /* loss timeout */
|
||||
CA_EVENT_ECN_NO_CE, /* ECT set, but not CE marked */
|
||||
CA_EVENT_ECN_IS_CE, /* received CE marked IP packet */
|
||||
+ CA_EVENT_TLP_RECOVERY, /* a lost segment was repaired by TLP probe */
|
||||
};
|
||||
|
||||
/* Information about inbound ACK, passed to cong_ops->in_ack_event() */
|
||||
--- a/net/ipv4/tcp_input.c
|
||||
+++ b/net/ipv4/tcp_input.c
|
||||
@@ -3859,6 +3859,7 @@ static void tcp_process_tlp_ack(struct s
|
||||
/* ACK advances: there was a loss, so reduce cwnd. Reset
|
||||
* tlp_high_seq in tcp_init_cwnd_reduction()
|
||||
*/
|
||||
+ tcp_ca_event(sk, CA_EVENT_TLP_RECOVERY);
|
||||
tcp_init_cwnd_reduction(sk);
|
||||
tcp_set_ca_state(sk, TCP_CA_CWR);
|
||||
tcp_end_cwnd_reduction(sk);
|
73
debian/patches/misc-bbr3/0014-net-tcp_bbr-v2-introduce-is_acking_tlp_retrans_seq-i.patch
vendored
Normal file
73
debian/patches/misc-bbr3/0014-net-tcp_bbr-v2-introduce-is_acking_tlp_retrans_seq-i.patch
vendored
Normal file
@@ -0,0 +1,73 @@
|
||||
From cab22a8e2e87870e8334a12ffcd0ba04ea81126f Mon Sep 17 00:00:00 2001
|
||||
From: Neal Cardwell <ncardwell@google.com>
|
||||
Date: Mon, 21 Sep 2020 14:46:26 -0400
|
||||
Subject: [PATCH 14/19] net-tcp_bbr: v2: introduce is_acking_tlp_retrans_seq
|
||||
into rate_sample
|
||||
|
||||
Introduce is_acking_tlp_retrans_seq into rate_sample. This bool will
|
||||
export to the CC module the knowledge of whether the current ACK
|
||||
matched a TLP retransmit.
|
||||
|
||||
Note that when this bool is true, we cannot yet tell (in general) whether
|
||||
this ACK is for the original or the TLP retransmit.
|
||||
|
||||
Effort: net-tcp_bbr
|
||||
Change-Id: I2e6494332167e75efcbdc99bd5c119034e9c39b4
|
||||
Signed-off-by: Alexandre Frade <kernel@xanmod.org>
|
||||
---
|
||||
include/net/tcp.h | 1 +
|
||||
net/ipv4/tcp_input.c | 12 +++++++++---
|
||||
2 files changed, 10 insertions(+), 3 deletions(-)
|
||||
|
||||
--- a/include/net/tcp.h
|
||||
+++ b/include/net/tcp.h
|
||||
@@ -1161,6 +1161,7 @@ struct rate_sample {
|
||||
u32 last_end_seq; /* end_seq of most recently ACKed packet */
|
||||
bool is_app_limited; /* is sample from packet with bubble in pipe? */
|
||||
bool is_retrans; /* is sample from retransmission? */
|
||||
+ bool is_acking_tlp_retrans_seq; /* ACKed a TLP retransmit sequence? */
|
||||
bool is_ack_delayed; /* is this (likely) a delayed ACK? */
|
||||
bool is_ece; /* did this ACK have ECN marked? */
|
||||
};
|
||||
--- a/net/ipv4/tcp_input.c
|
||||
+++ b/net/ipv4/tcp_input.c
|
||||
@@ -3842,7 +3842,8 @@ static void tcp_replace_ts_recent(struct
|
||||
/* This routine deals with acks during a TLP episode and ends an episode by
|
||||
* resetting tlp_high_seq. Ref: TLP algorithm in draft-ietf-tcpm-rack
|
||||
*/
|
||||
-static void tcp_process_tlp_ack(struct sock *sk, u32 ack, int flag)
|
||||
+static void tcp_process_tlp_ack(struct sock *sk, u32 ack, int flag,
|
||||
+ struct rate_sample *rs)
|
||||
{
|
||||
struct tcp_sock *tp = tcp_sk(sk);
|
||||
|
||||
@@ -3870,6 +3871,11 @@ static void tcp_process_tlp_ack(struct s
|
||||
FLAG_NOT_DUP | FLAG_DATA_SACKED))) {
|
||||
/* Pure dupack: original and TLP probe arrived; no loss */
|
||||
tp->tlp_high_seq = 0;
|
||||
+ } else {
|
||||
+ /* This ACK matches a TLP retransmit. We cannot yet tell if
|
||||
+ * this ACK is for the original or the TLP retransmit.
|
||||
+ */
|
||||
+ rs->is_acking_tlp_retrans_seq = 1;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -4053,7 +4059,7 @@ static int tcp_ack(struct sock *sk, cons
|
||||
tcp_rack_update_reo_wnd(sk, &rs);
|
||||
|
||||
if (tp->tlp_high_seq)
|
||||
- tcp_process_tlp_ack(sk, ack, flag);
|
||||
+ tcp_process_tlp_ack(sk, ack, flag, &rs);
|
||||
|
||||
if (tcp_ack_is_dubious(sk, flag)) {
|
||||
if (!(flag & (FLAG_SND_UNA_ADVANCED |
|
||||
@@ -4097,7 +4103,7 @@ no_queue:
|
||||
tcp_ack_probe(sk);
|
||||
|
||||
if (tp->tlp_high_seq)
|
||||
- tcp_process_tlp_ack(sk, ack, flag);
|
||||
+ tcp_process_tlp_ack(sk, ack, flag, &rs);
|
||||
return 1;
|
||||
|
||||
old_ack:
|
112
debian/patches/misc-bbr3/0015-tcp-introduce-per-route-feature-RTAX_FEATURE_ECN_LOW.patch
vendored
Normal file
112
debian/patches/misc-bbr3/0015-tcp-introduce-per-route-feature-RTAX_FEATURE_ECN_LOW.patch
vendored
Normal file
@@ -0,0 +1,112 @@
|
||||
From 38dd25482f815d949fec91edd7694b2f15823f67 Mon Sep 17 00:00:00 2001
|
||||
From: David Morley <morleyd@google.com>
|
||||
Date: Fri, 14 Jul 2023 11:07:56 -0400
|
||||
Subject: [PATCH 15/19] tcp: introduce per-route feature RTAX_FEATURE_ECN_LOW
|
||||
|
||||
Define and implement a new per-route feature, RTAX_FEATURE_ECN_LOW.
|
||||
|
||||
This feature indicates that the given destination network is a
|
||||
low-latency ECN environment, meaning both that ECN CE marks are
|
||||
applied by the network using a low-latency marking threshold and also
|
||||
that TCP endpoints provide precise per-data-segment ECN feedback in
|
||||
ACKs (where the ACK ECE flag echoes the received CE status of all
|
||||
newly-acknowledged data segments). This feature indication can be used
|
||||
by congestion control algorithms to decide how to interpret ECN
|
||||
signals over the given destination network.
|
||||
|
||||
This feature is appropriate for datacenter-style ECN marking, such as
|
||||
the ECN marking approach expected by DCTCP or BBR congestion control
|
||||
modules.
|
||||
|
||||
Signed-off-by: David Morley <morleyd@google.com>
|
||||
Signed-off-by: Neal Cardwell <ncardwell@google.com>
|
||||
Signed-off-by: Yuchung Cheng <ycheng@google.com>
|
||||
Tested-by: David Morley <morleyd@google.com>
|
||||
Change-Id: I6bc06e9c6cb426fbae7243fc71c9a8c18175f5d3
|
||||
Signed-off-by: Alexandre Frade <kernel@xanmod.org>
|
||||
---
|
||||
include/net/tcp.h | 10 ++++++++++
|
||||
include/uapi/linux/rtnetlink.h | 4 +++-
|
||||
net/ipv4/tcp_minisocks.c | 2 ++
|
||||
net/ipv4/tcp_output.c | 6 ++++--
|
||||
4 files changed, 19 insertions(+), 3 deletions(-)
|
||||
|
||||
--- a/include/net/tcp.h
|
||||
+++ b/include/net/tcp.h
|
||||
@@ -375,6 +375,7 @@ static inline void tcp_dec_quickack_mode
|
||||
#define TCP_ECN_QUEUE_CWR 2
|
||||
#define TCP_ECN_DEMAND_CWR 4
|
||||
#define TCP_ECN_SEEN 8
|
||||
+#define TCP_ECN_LOW 16
|
||||
|
||||
enum tcp_tw_status {
|
||||
TCP_TW_SUCCESS = 0,
|
||||
@@ -777,6 +778,15 @@ static inline void tcp_fast_path_check(s
|
||||
tcp_fast_path_on(tp);
|
||||
}
|
||||
|
||||
+static inline void tcp_set_ecn_low_from_dst(struct sock *sk,
|
||||
+ const struct dst_entry *dst)
|
||||
+{
|
||||
+ struct tcp_sock *tp = tcp_sk(sk);
|
||||
+
|
||||
+ if (dst_feature(dst, RTAX_FEATURE_ECN_LOW))
|
||||
+ tp->ecn_flags |= TCP_ECN_LOW;
|
||||
+}
|
||||
+
|
||||
u32 tcp_delack_max(const struct sock *sk);
|
||||
|
||||
/* Compute the actual rto_min value */
|
||||
--- a/include/uapi/linux/rtnetlink.h
|
||||
+++ b/include/uapi/linux/rtnetlink.h
|
||||
@@ -507,12 +507,14 @@ enum {
|
||||
#define RTAX_FEATURE_TIMESTAMP (1 << 2) /* unused */
|
||||
#define RTAX_FEATURE_ALLFRAG (1 << 3) /* unused */
|
||||
#define RTAX_FEATURE_TCP_USEC_TS (1 << 4)
|
||||
+#define RTAX_FEATURE_ECN_LOW (1 << 5)
|
||||
|
||||
#define RTAX_FEATURE_MASK (RTAX_FEATURE_ECN | \
|
||||
RTAX_FEATURE_SACK | \
|
||||
RTAX_FEATURE_TIMESTAMP | \
|
||||
RTAX_FEATURE_ALLFRAG | \
|
||||
- RTAX_FEATURE_TCP_USEC_TS)
|
||||
+ RTAX_FEATURE_TCP_USEC_TS | \
|
||||
+ RTAX_FEATURE_ECN_LOW)
|
||||
|
||||
struct rta_session {
|
||||
__u8 proto;
|
||||
--- a/net/ipv4/tcp_minisocks.c
|
||||
+++ b/net/ipv4/tcp_minisocks.c
|
||||
@@ -459,6 +459,8 @@ void tcp_ca_openreq_child(struct sock *s
|
||||
u32 ca_key = dst_metric(dst, RTAX_CC_ALGO);
|
||||
bool ca_got_dst = false;
|
||||
|
||||
+ tcp_set_ecn_low_from_dst(sk, dst);
|
||||
+
|
||||
if (ca_key != TCP_CA_UNSPEC) {
|
||||
const struct tcp_congestion_ops *ca;
|
||||
|
||||
--- a/net/ipv4/tcp_output.c
|
||||
+++ b/net/ipv4/tcp_output.c
|
||||
@@ -336,10 +336,9 @@ static void tcp_ecn_send_syn(struct sock
|
||||
bool bpf_needs_ecn = tcp_bpf_ca_needs_ecn(sk);
|
||||
bool use_ecn = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_ecn) == 1 ||
|
||||
tcp_ca_needs_ecn(sk) || bpf_needs_ecn;
|
||||
+ const struct dst_entry *dst = __sk_dst_get(sk);
|
||||
|
||||
if (!use_ecn) {
|
||||
- const struct dst_entry *dst = __sk_dst_get(sk);
|
||||
-
|
||||
if (dst && dst_feature(dst, RTAX_FEATURE_ECN))
|
||||
use_ecn = true;
|
||||
}
|
||||
@@ -351,6 +350,9 @@ static void tcp_ecn_send_syn(struct sock
|
||||
tp->ecn_flags = TCP_ECN_OK;
|
||||
if (tcp_ca_needs_ecn(sk) || bpf_needs_ecn)
|
||||
INET_ECN_xmit(sk);
|
||||
+
|
||||
+ if (dst)
|
||||
+ tcp_set_ecn_low_from_dst(sk, dst);
|
||||
}
|
||||
}
|
||||
|
2821
debian/patches/misc-bbr3/0016-net-tcp_bbr-v3-update-TCP-bbr-congestion-control-mod.patch
vendored
Normal file
2821
debian/patches/misc-bbr3/0016-net-tcp_bbr-v3-update-TCP-bbr-congestion-control-mod.patch
vendored
Normal file
File diff suppressed because it is too large
Load Diff
59
debian/patches/misc-bbr3/0017-net-tcp_bbr-v3-ensure-ECN-enabled-BBR-flows-set-ECT-.patch
vendored
Normal file
59
debian/patches/misc-bbr3/0017-net-tcp_bbr-v3-ensure-ECN-enabled-BBR-flows-set-ECT-.patch
vendored
Normal file
@@ -0,0 +1,59 @@
|
||||
From 99e86f904f246ae9ec7a13d1b920eaf4a8c2d47b Mon Sep 17 00:00:00 2001
|
||||
From: Adithya Abraham Philip <abrahamphilip@google.com>
|
||||
Date: Fri, 11 Jun 2021 21:56:10 +0000
|
||||
Subject: [PATCH 17/19] net-tcp_bbr: v3: ensure ECN-enabled BBR flows set ECT
|
||||
on retransmits
|
||||
|
||||
Adds a new flag TCP_ECN_ECT_PERMANENT that is used by CCAs to
|
||||
indicate that retransmitted packets and pure ACKs must have the
|
||||
ECT bit set. This is necessary for BBR, which when using
|
||||
ECN expects ECT to be set even on retransmitted packets and ACKs.
|
||||
|
||||
Previous to this addition of TCP_ECN_ECT_PERMANENT, CCAs which can use
|
||||
ECN but don't "need" it did not have a way to indicate that ECT should
|
||||
be set on retransmissions/ACKs.
|
||||
|
||||
Signed-off-by: Adithya Abraham Philip <abrahamphilip@google.com>
|
||||
Signed-off-by: Neal Cardwell <ncardwell@google.com>
|
||||
Change-Id: I8b048eaab35e136fe6501ef6cd89fd9faa15e6d2
|
||||
Signed-off-by: Alexandre Frade <kernel@xanmod.org>
|
||||
---
|
||||
include/net/tcp.h | 1 +
|
||||
net/ipv4/tcp_bbr.c | 3 +++
|
||||
net/ipv4/tcp_output.c | 3 ++-
|
||||
3 files changed, 6 insertions(+), 1 deletion(-)
|
||||
|
||||
--- a/include/net/tcp.h
|
||||
+++ b/include/net/tcp.h
|
||||
@@ -376,6 +376,7 @@ static inline void tcp_dec_quickack_mode
|
||||
#define TCP_ECN_DEMAND_CWR 4
|
||||
#define TCP_ECN_SEEN 8
|
||||
#define TCP_ECN_LOW 16
|
||||
+#define TCP_ECN_ECT_PERMANENT 32
|
||||
|
||||
enum tcp_tw_status {
|
||||
TCP_TW_SUCCESS = 0,
|
||||
--- a/net/ipv4/tcp_bbr.c
|
||||
+++ b/net/ipv4/tcp_bbr.c
|
||||
@@ -2151,6 +2151,9 @@ __bpf_kfunc static void bbr_init(struct
|
||||
bbr->plb.pause_until = 0;
|
||||
|
||||
tp->fast_ack_mode = bbr_fast_ack_mode ? 1 : 0;
|
||||
+
|
||||
+ if (bbr_can_use_ecn(sk))
|
||||
+ tp->ecn_flags |= TCP_ECN_ECT_PERMANENT;
|
||||
}
|
||||
|
||||
/* BBR marks the current round trip as a loss round. */
|
||||
--- a/net/ipv4/tcp_output.c
|
||||
+++ b/net/ipv4/tcp_output.c
|
||||
@@ -390,7 +390,8 @@ static void tcp_ecn_send(struct sock *sk
|
||||
th->cwr = 1;
|
||||
skb_shinfo(skb)->gso_type |= SKB_GSO_TCP_ECN;
|
||||
}
|
||||
- } else if (!tcp_ca_needs_ecn(sk)) {
|
||||
+ } else if (!(tp->ecn_flags & TCP_ECN_ECT_PERMANENT) &&
|
||||
+ !tcp_ca_needs_ecn(sk)) {
|
||||
/* ACK or retransmitted segment: clear ECT|CE */
|
||||
INET_ECN_dontxmit(sk);
|
||||
}
|
38
debian/patches/misc-bbr3/0018-tcp-export-TCPI_OPT_ECN_LOW-in-tcp_info-tcpi_options.patch
vendored
Normal file
38
debian/patches/misc-bbr3/0018-tcp-export-TCPI_OPT_ECN_LOW-in-tcp_info-tcpi_options.patch
vendored
Normal file
@@ -0,0 +1,38 @@
|
||||
From 5d7cb61552d374bcaaa95022129b4ca1eace1c33 Mon Sep 17 00:00:00 2001
|
||||
From: Neal Cardwell <ncardwell@google.com>
|
||||
Date: Sun, 23 Jul 2023 23:25:34 -0400
|
||||
Subject: [PATCH 18/19] tcp: export TCPI_OPT_ECN_LOW in tcp_info tcpi_options
|
||||
field
|
||||
|
||||
Analogous to other important ECN information, export TCPI_OPT_ECN_LOW
|
||||
in tcp_info tcpi_options field.
|
||||
|
||||
Signed-off-by: Neal Cardwell <ncardwell@google.com>
|
||||
Change-Id: I08d8d8c7e8780e6e37df54038ee50301ac5a0320
|
||||
Signed-off-by: Alexandre Frade <kernel@xanmod.org>
|
||||
---
|
||||
include/uapi/linux/tcp.h | 1 +
|
||||
net/ipv4/tcp.c | 2 ++
|
||||
2 files changed, 3 insertions(+)
|
||||
|
||||
--- a/include/uapi/linux/tcp.h
|
||||
+++ b/include/uapi/linux/tcp.h
|
||||
@@ -178,6 +178,7 @@ enum tcp_fastopen_client_fail {
|
||||
#define TCPI_OPT_ECN_SEEN 16 /* we received at least one packet with ECT */
|
||||
#define TCPI_OPT_SYN_DATA 32 /* SYN-ACK acked data in SYN sent or rcvd */
|
||||
#define TCPI_OPT_USEC_TS 64 /* usec timestamps */
|
||||
+#define TCPI_OPT_ECN_LOW 128 /* Low-latency ECN configured at init */
|
||||
|
||||
/*
|
||||
* Sender's congestion state indicating normal or abnormal situations
|
||||
--- a/net/ipv4/tcp.c
|
||||
+++ b/net/ipv4/tcp.c
|
||||
@@ -3850,6 +3850,8 @@ void tcp_get_info(struct sock *sk, struc
|
||||
info->tcpi_options |= TCPI_OPT_ECN;
|
||||
if (tp->ecn_flags & TCP_ECN_SEEN)
|
||||
info->tcpi_options |= TCPI_OPT_ECN_SEEN;
|
||||
+ if (tp->ecn_flags & TCP_ECN_LOW)
|
||||
+ info->tcpi_options |= TCPI_OPT_ECN_LOW;
|
||||
if (tp->syn_data_acked)
|
||||
info->tcpi_options |= TCPI_OPT_SYN_DATA;
|
||||
if (tp->tcp_usec_ts)
|
42
debian/patches/misc-bbr3/0019-x86-cfi-bpf-Add-tso_segs-and-skb_marked_lost-to-bpf_.patch
vendored
Normal file
42
debian/patches/misc-bbr3/0019-x86-cfi-bpf-Add-tso_segs-and-skb_marked_lost-to-bpf_.patch
vendored
Normal file
@@ -0,0 +1,42 @@
|
||||
From 39838c2f0b09bec02004c092904aada85da2bc2e Mon Sep 17 00:00:00 2001
|
||||
From: Alexandre Frade <kernel@xanmod.org>
|
||||
Date: Mon, 11 Mar 2024 12:01:13 -0300
|
||||
Subject: [PATCH 19/19] x86/cfi,bpf: Add tso_segs and skb_marked_lost to
|
||||
bpf_struct_ops CFI
|
||||
|
||||
Rebased-by: Oleksandr Natalenko <oleksandr@natalenko.name>
|
||||
[ https://github.com/sirlucjan/kernel-patches/blob/master/6.8/bbr3-patches/0001-tcp-bbr3-initial-import.patch ]
|
||||
Signed-off-by: Alexandre Frade <kernel@xanmod.org>
|
||||
---
|
||||
net/ipv4/bpf_tcp_ca.c | 9 +++++++--
|
||||
1 file changed, 7 insertions(+), 2 deletions(-)
|
||||
|
||||
--- a/net/ipv4/bpf_tcp_ca.c
|
||||
+++ b/net/ipv4/bpf_tcp_ca.c
|
||||
@@ -305,11 +305,15 @@ static void bpf_tcp_ca_pkts_acked(struct
|
||||
{
|
||||
}
|
||||
|
||||
-static u32 bpf_tcp_ca_min_tso_segs(struct sock *sk)
|
||||
+static u32 bpf_tcp_ca_tso_segs(struct sock *sk, unsigned int mss_now)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
+static void bpf_tcp_ca_skb_marked_lost(struct sock *sk, const struct sk_buff *skb)
|
||||
+{
|
||||
+}
|
||||
+
|
||||
static void bpf_tcp_ca_cong_control(struct sock *sk, u32 ack, int flag,
|
||||
const struct rate_sample *rs)
|
||||
{
|
||||
@@ -340,7 +344,8 @@ static struct tcp_congestion_ops __bpf_o
|
||||
.cwnd_event = bpf_tcp_ca_cwnd_event,
|
||||
.in_ack_event = bpf_tcp_ca_in_ack_event,
|
||||
.pkts_acked = bpf_tcp_ca_pkts_acked,
|
||||
- .min_tso_segs = bpf_tcp_ca_min_tso_segs,
|
||||
+ .tso_segs = bpf_tcp_ca_tso_segs,
|
||||
+ .skb_marked_lost = bpf_tcp_ca_skb_marked_lost,
|
||||
.cong_control = bpf_tcp_ca_cong_control,
|
||||
.undo_cwnd = bpf_tcp_ca_undo_cwnd,
|
||||
.sndbuf_expand = bpf_tcp_ca_sndbuf_expand,
|
Reference in New Issue
Block a user