add 3rd party/custom patches

3rd patchs (in alphabetical order): - bbr3 - ntsync5 - openwrt - pf-kernel - xanmod - zen no configuration changes for now
2024-10-29 05:12:06 +03:00
parent 8082dfeaca
commit 8cbaf1dea2
186 changed files with 43626 additions and 0 deletions
--- a/debian/patches/misc-bbr3/0001-net-tcp_bbr-broaden-app-limited-rate-sample-detectio.patch
+++ b/debian/patches/misc-bbr3/0001-net-tcp_bbr-broaden-app-limited-rate-sample-detectio.patch
@@ -0,0 +1,52 @@
+From ce1cd7869a208112a8728d1fe9e373f78a2e4a6e Mon Sep 17 00:00:00 2001
+From: Neal Cardwell <ncardwell@google.com>
+Date: Tue, 11 Jun 2019 12:26:55 -0400
+Subject: [PATCH 01/19] net-tcp_bbr: broaden app-limited rate sample detection
+
+This commit is a bug fix for the Linux TCP app-limited
+(application-limited) logic that is used for collecting rate
+(bandwidth) samples.
+
+Previously the app-limited logic only looked for "bubbles" of
+silence in between application writes, by checking at the start
+of each sendmsg. But "bubbles" of silence can also happen before
+retransmits: e.g. bubbles can happen between an application write
+and a retransmit, or between two retransmits.
+
+Retransmits are triggered by ACKs or timers. So this commit checks
+for bubbles of app-limited silence upon ACKs or timers.
+
+Why does this commit check for app-limited state at the start of
+ACKs and timer handling? Because at that point we know whether
+inflight was fully using the cwnd.  During processing the ACK or
+timer event we often change the cwnd; after changing the cwnd we
+can't know whether inflight was fully using the old cwnd.
+
+Origin-9xx-SHA1: 3fe9b53291e018407780fb8c356adb5666722cbc
+Change-Id: I37221506f5166877c2b110753d39bb0757985e68
+Signed-off-by: Alexandre Frade <kernel@xanmod.org>
+---
+ net/ipv4/tcp_input.c | 1 +
+ net/ipv4/tcp_timer.c | 1 +
+ 2 files changed, 2 insertions(+)
+
+--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
+@@ -3961,6 +3961,7 @@ static int tcp_ack(struct sock *sk, cons
+ 
+ 	prior_fack = tcp_is_sack(tp) ? tcp_highest_sack_seq(tp) : tp->snd_una;
+ 	rs.prior_in_flight = tcp_packets_in_flight(tp);
+	tcp_rate_check_app_limited(sk);
+ 
+ 	/* ts_recent update must be made after we are sure that the packet
+ 	 * is in window.
+--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
+@@ -689,6 +689,7 @@ void tcp_write_timer_handler(struct sock
+ 		return;
+ 	}
+ 
+	tcp_rate_check_app_limited(sk);
+ 	tcp_mstamp_refresh(tcp_sk(sk));
+ 	event = icsk->icsk_pending;
+ 
--- a/debian/patches/misc-bbr3/0002-net-tcp_bbr-v2-shrink-delivered_mstamp-first_tx_msta.patch
+++ b/debian/patches/misc-bbr3/0002-net-tcp_bbr-v2-shrink-delivered_mstamp-first_tx_msta.patch
@@ -0,0 +1,74 @@
+From b32715fbe2ab96d1060ec37bb9c03feedf366494 Mon Sep 17 00:00:00 2001
+From: Neal Cardwell <ncardwell@google.com>
+Date: Sun, 24 Jun 2018 21:55:59 -0400
+Subject: [PATCH 02/19] net-tcp_bbr: v2: shrink delivered_mstamp,
+ first_tx_mstamp to u32 to free up 8 bytes
+
+Free up some space for tracking inflight and losses for each
+bw sample, in upcoming commits.
+
+These timestamps are in microseconds, and are now stored in 32
+bits. So they can only hold time intervals up to roughly 2^12 = 4096
+seconds.  But Linux TCP RTT and RTO tracking has the same 32-bit
+microsecond implementation approach and resulting deployment
+limitations. So this is not introducing a new limit. And these should
+not be a limitation for the foreseeable future.
+
+Effort: net-tcp_bbr
+Origin-9xx-SHA1: 238a7e6b5d51625fef1ce7769826a7b21b02ae55
+Change-Id: I3b779603797263b52a61ad57c565eb91fe42680c
+Signed-off-by: Alexandre Frade <kernel@xanmod.org>
+---
+ include/net/tcp.h   | 9 +++++++--
+ net/ipv4/tcp_rate.c | 7 ++++---
+ 2 files changed, 11 insertions(+), 5 deletions(-)
+
+--- a/include/net/tcp.h
+++ b/include/net/tcp.h
+@@ -884,6 +884,11 @@ static inline u32 tcp_stamp_us_delta(u64
+ 	return max_t(s64, t1 - t0, 0);
+ }
+ 
+static inline u32 tcp_stamp32_us_delta(u32 t1, u32 t0)
+{
+	return max_t(s32, t1 - t0, 0);
+}
+
+ /* provide the departure time in us unit */
+ static inline u64 tcp_skb_timestamp_us(const struct sk_buff *skb)
+ {
+@@ -973,9 +978,9 @@ struct tcp_skb_cb {
+ 			/* pkts S/ACKed so far upon tx of skb, incl retrans: */
+ 			__u32 delivered;
+ 			/* start of send pipeline phase */
+-			u64 first_tx_mstamp;
+			u32 first_tx_mstamp;
+ 			/* when we reached the "delivered" count */
+-			u64 delivered_mstamp;
+			u32 delivered_mstamp;
+ 		} tx;   /* only used for outgoing skbs */
+ 		union {
+ 			struct inet_skb_parm	h4;
+--- a/net/ipv4/tcp_rate.c
+++ b/net/ipv4/tcp_rate.c
+@@ -101,8 +101,9 @@ void tcp_rate_skb_delivered(struct sock
+ 		/* Record send time of most recently ACKed packet: */
+ 		tp->first_tx_mstamp  = tx_tstamp;
+ 		/* Find the duration of the "send phase" of this window: */
+-		rs->interval_us = tcp_stamp_us_delta(tp->first_tx_mstamp,
+-						     scb->tx.first_tx_mstamp);
+		rs->interval_us      = tcp_stamp32_us_delta(
+						tp->first_tx_mstamp,
+						scb->tx.first_tx_mstamp);
+ 
+ 	}
+ 	/* Mark off the skb delivered once it's sacked to avoid being
+@@ -155,7 +156,7 @@ void tcp_rate_gen(struct sock *sk, u32 d
+ 	 * longer phase.
+ 	 */
+ 	snd_us = rs->interval_us;				/* send phase */
+-	ack_us = tcp_stamp_us_delta(tp->tcp_mstamp,
+	ack_us = tcp_stamp32_us_delta(tp->tcp_mstamp,
+ 				    rs->prior_mstamp); /* ack phase */
+ 	rs->interval_us = max(snd_us, ack_us);
+ 
--- a/debian/patches/misc-bbr3/0003-net-tcp_bbr-v2-snapshot-packets-in-flight-at-transmi.patch
+++ b/debian/patches/misc-bbr3/0003-net-tcp_bbr-v2-snapshot-packets-in-flight-at-transmi.patch
@@ -0,0 +1,109 @@
+From 25856231832186fe13189b986cc0e91860c18201 Mon Sep 17 00:00:00 2001
+From: Neal Cardwell <ncardwell@google.com>
+Date: Sat, 5 Aug 2017 11:49:50 -0400
+Subject: [PATCH 03/19] net-tcp_bbr: v2: snapshot packets in flight at transmit
+ time and pass in rate_sample
+
+CC algorithms may want to snapshot the number of packets in flight at
+transmit time and pass in rate_sample, to understand the relationship
+between inflight and losses or ECN signals, to try to find the highest
+inflight value that has acceptable levels of loss/ECN marking.
+
+We split out the code to set an skb's tx.in_flight field into its own
+function, so that this code can be used for the TCP_REPAIR "fake send"
+code path that inserts skbs into the rtx queue without sending them.
+
+Effort: net-tcp_bbr
+Origin-9xx-SHA1: b3eb4f2d20efab4ca001f32c9294739036c493ea
+Origin-9xx-SHA1: e880fc907d06ea7354333f60f712748ebce9497b
+Origin-9xx-SHA1: 330f825a08a6fe92cef74d799cc468864c479f63
+Change-Id: I7314047d0ff14dd261a04b1969a46dc658c8836a
+Signed-off-by: Alexandre Frade <kernel@xanmod.org>
+---
+ include/net/tcp.h     |  6 ++++++
+ net/ipv4/tcp_output.c |  1 +
+ net/ipv4/tcp_rate.c   | 20 ++++++++++++++++++++
+ 3 files changed, 27 insertions(+)
+
+--- a/include/net/tcp.h
+++ b/include/net/tcp.h
+@@ -981,6 +981,10 @@ struct tcp_skb_cb {
+ 			u32 first_tx_mstamp;
+ 			/* when we reached the "delivered" count */
+ 			u32 delivered_mstamp;
+#define TCPCB_IN_FLIGHT_BITS 20
+#define TCPCB_IN_FLIGHT_MAX ((1U << TCPCB_IN_FLIGHT_BITS) - 1)
+			u32 in_flight:20,   /* packets in flight at transmit */
+			    unused2:12;
+ 		} tx;   /* only used for outgoing skbs */
+ 		union {
+ 			struct inet_skb_parm	h4;
+@@ -1136,6 +1140,7 @@ struct rate_sample {
+ 	u64  prior_mstamp; /* starting timestamp for interval */
+ 	u32  prior_delivered;	/* tp->delivered at "prior_mstamp" */
+ 	u32  prior_delivered_ce;/* tp->delivered_ce at "prior_mstamp" */
+	u32 tx_in_flight;	/* packets in flight at starting timestamp */
+ 	s32  delivered;		/* number of packets delivered over interval */
+ 	s32  delivered_ce;	/* number of packets delivered w/ CE marks*/
+ 	long interval_us;	/* time for tp->delivered to incr "delivered" */
+@@ -1258,6 +1263,7 @@ static inline void tcp_ca_event(struct s
+ void tcp_set_ca_state(struct sock *sk, const u8 ca_state);
+ 
+ /* From tcp_rate.c */
+void tcp_set_tx_in_flight(struct sock *sk, struct sk_buff *skb);
+ void tcp_rate_skb_sent(struct sock *sk, struct sk_buff *skb);
+ void tcp_rate_skb_delivered(struct sock *sk, struct sk_buff *skb,
+ 			    struct rate_sample *rs);
+--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
+@@ -2765,6 +2765,7 @@ static bool tcp_write_xmit(struct sock *
+ 			skb_set_delivery_time(skb, tp->tcp_wstamp_ns, SKB_CLOCK_MONOTONIC);
+ 			list_move_tail(&skb->tcp_tsorted_anchor, &tp->tsorted_sent_queue);
+ 			tcp_init_tso_segs(skb, mss_now);
+			tcp_set_tx_in_flight(sk, skb);
+ 			goto repair; /* Skip network transmission */
+ 		}
+ 
+--- a/net/ipv4/tcp_rate.c
+++ b/net/ipv4/tcp_rate.c
+@@ -34,6 +34,24 @@
+  * ready to send in the write queue.
+  */
+ 
+void tcp_set_tx_in_flight(struct sock *sk, struct sk_buff *skb)
+{
+	struct tcp_sock *tp = tcp_sk(sk);
+	u32 in_flight;
+
+	/* Check, sanitize, and record packets in flight after skb was sent. */
+	in_flight = tcp_packets_in_flight(tp) + tcp_skb_pcount(skb);
+	if (WARN_ONCE(in_flight > TCPCB_IN_FLIGHT_MAX,
+		      "insane in_flight %u cc %s mss %u "
+		      "cwnd %u pif %u %u %u %u\n",
+		      in_flight, inet_csk(sk)->icsk_ca_ops->name,
+		      tp->mss_cache, tp->snd_cwnd,
+		      tp->packets_out, tp->retrans_out,
+		      tp->sacked_out, tp->lost_out))
+		in_flight = TCPCB_IN_FLIGHT_MAX;
+	TCP_SKB_CB(skb)->tx.in_flight = in_flight;
+}
+
+ /* Snapshot the current delivery information in the skb, to generate
+  * a rate sample later when the skb is (s)acked in tcp_rate_skb_delivered().
+  */
+@@ -67,6 +85,7 @@ void tcp_rate_skb_sent(struct sock *sk,
+ 	TCP_SKB_CB(skb)->tx.delivered		= tp->delivered;
+ 	TCP_SKB_CB(skb)->tx.delivered_ce	= tp->delivered_ce;
+ 	TCP_SKB_CB(skb)->tx.is_app_limited	= tp->app_limited ? 1 : 0;
+	tcp_set_tx_in_flight(sk, skb);
+ }
+ 
+ /* When an skb is sacked or acked, we fill in the rate sample with the (prior)
+@@ -96,6 +115,7 @@ void tcp_rate_skb_delivered(struct sock
+ 		rs->prior_mstamp     = scb->tx.delivered_mstamp;
+ 		rs->is_app_limited   = scb->tx.is_app_limited;
+ 		rs->is_retrans	     = scb->sacked & TCPCB_RETRANS;
+		rs->tx_in_flight     = scb->tx.in_flight;
+ 		rs->last_end_seq     = scb->end_seq;
+ 
+ 		/* Record send time of most recently ACKed packet: */
--- a/debian/patches/misc-bbr3/0004-net-tcp_bbr-v2-count-packets-lost-over-TCP-rate-samp.patch
+++ b/debian/patches/misc-bbr3/0004-net-tcp_bbr-v2-count-packets-lost-over-TCP-rate-samp.patch
@@ -0,0 +1,70 @@
+From b1772710e8b5b98c09e96d4f1af620cd938fddf7 Mon Sep 17 00:00:00 2001
+From: Neal Cardwell <ncardwell@google.com>
+Date: Thu, 12 Oct 2017 23:44:27 -0400
+Subject: [PATCH 04/19] net-tcp_bbr: v2: count packets lost over TCP rate
+ sampling interval
+
+For understanding the relationship between inflight and packet loss
+signals, to try to find the highest inflight value that has acceptable
+levels of packet losses.
+
+Effort: net-tcp_bbr
+Origin-9xx-SHA1: 4527e26b2bd7756a88b5b9ef1ada3da33dd609ab
+Change-Id: I594c2500868d9c530770e7ddd68ffc87c57f4fd5
+Signed-off-by: Alexandre Frade <kernel@xanmod.org>
+---
+ include/net/tcp.h   | 5 ++++-
+ net/ipv4/tcp_rate.c | 3 +++
+ 2 files changed, 7 insertions(+), 1 deletion(-)
+
+--- a/include/net/tcp.h
+++ b/include/net/tcp.h
+@@ -985,6 +985,7 @@ struct tcp_skb_cb {
+ #define TCPCB_IN_FLIGHT_MAX ((1U << TCPCB_IN_FLIGHT_BITS) - 1)
+ 			u32 in_flight:20,   /* packets in flight at transmit */
+ 			    unused2:12;
+			u32 lost;	/* packets lost so far upon tx of skb */
+ 		} tx;   /* only used for outgoing skbs */
+ 		union {
+ 			struct inet_skb_parm	h4;
+@@ -1138,11 +1139,13 @@ struct ack_sample {
+  */
+ struct rate_sample {
+ 	u64  prior_mstamp; /* starting timestamp for interval */
+	u32  prior_lost;	/* tp->lost at "prior_mstamp" */
+ 	u32  prior_delivered;	/* tp->delivered at "prior_mstamp" */
+ 	u32  prior_delivered_ce;/* tp->delivered_ce at "prior_mstamp" */
+ 	u32 tx_in_flight;	/* packets in flight at starting timestamp */
+	s32  lost;		/* number of packets lost over interval */
+ 	s32  delivered;		/* number of packets delivered over interval */
+-	s32  delivered_ce;	/* number of packets delivered w/ CE marks*/
+	s32  delivered_ce;	/* packets delivered w/ CE mark over interval */
+ 	long interval_us;	/* time for tp->delivered to incr "delivered" */
+ 	u32 snd_interval_us;	/* snd interval for delivered packets */
+ 	u32 rcv_interval_us;	/* rcv interval for delivered packets */
+--- a/net/ipv4/tcp_rate.c
+++ b/net/ipv4/tcp_rate.c
+@@ -84,6 +84,7 @@ void tcp_rate_skb_sent(struct sock *sk,
+ 	TCP_SKB_CB(skb)->tx.delivered_mstamp	= tp->delivered_mstamp;
+ 	TCP_SKB_CB(skb)->tx.delivered		= tp->delivered;
+ 	TCP_SKB_CB(skb)->tx.delivered_ce	= tp->delivered_ce;
+	TCP_SKB_CB(skb)->tx.lost		= tp->lost;
+ 	TCP_SKB_CB(skb)->tx.is_app_limited	= tp->app_limited ? 1 : 0;
+ 	tcp_set_tx_in_flight(sk, skb);
+ }
+@@ -110,6 +111,7 @@ void tcp_rate_skb_delivered(struct sock
+ 	if (!rs->prior_delivered ||
+ 	    tcp_skb_sent_after(tx_tstamp, tp->first_tx_mstamp,
+ 			       scb->end_seq, rs->last_end_seq)) {
+		rs->prior_lost	     = scb->tx.lost;
+ 		rs->prior_delivered_ce  = scb->tx.delivered_ce;
+ 		rs->prior_delivered  = scb->tx.delivered;
+ 		rs->prior_mstamp     = scb->tx.delivered_mstamp;
+@@ -165,6 +167,7 @@ void tcp_rate_gen(struct sock *sk, u32 d
+ 		return;
+ 	}
+ 	rs->delivered   = tp->delivered - rs->prior_delivered;
+	rs->lost        = tp->lost - rs->prior_lost;
+ 
+ 	rs->delivered_ce = tp->delivered_ce - rs->prior_delivered_ce;
+ 	/* delivered_ce occupies less than 32 bits in the skb control block */
--- a/debian/patches/misc-bbr3/0005-net-tcp_bbr-v2-export-FLAG_ECE-in-rate_sample.is_ece.patch
+++ b/debian/patches/misc-bbr3/0005-net-tcp_bbr-v2-export-FLAG_ECE-in-rate_sample.is_ece.patch
@@ -0,0 +1,38 @@
+From fdf01142aea8645186e080f1278d3b5a5fd8c66c Mon Sep 17 00:00:00 2001
+From: Neal Cardwell <ncardwell@google.com>
+Date: Mon, 19 Nov 2018 13:48:36 -0500
+Subject: [PATCH 05/19] net-tcp_bbr: v2: export FLAG_ECE in rate_sample.is_ece
+
+For understanding the relationship between inflight and ECN signals,
+to try to find the highest inflight value that has acceptable levels
+ECN marking.
+
+Effort: net-tcp_bbr
+Origin-9xx-SHA1: 3eba998f2898541406c2666781182200934965a8
+Change-Id: I3a964e04cee83e11649a54507043d2dfe769a3b3
+Signed-off-by: Alexandre Frade <kernel@xanmod.org>
+---
+ include/net/tcp.h    | 1 +
+ net/ipv4/tcp_input.c | 1 +
+ 2 files changed, 2 insertions(+)
+
+--- a/include/net/tcp.h
+++ b/include/net/tcp.h
+@@ -1157,6 +1157,7 @@ struct rate_sample {
+ 	bool is_app_limited;	/* is sample from packet with bubble in pipe? */
+ 	bool is_retrans;	/* is sample from retransmission? */
+ 	bool is_ack_delayed;	/* is this (likely) a delayed ACK? */
+	bool is_ece;		/* did this ACK have ECN marked? */
+ };
+ 
+ struct tcp_congestion_ops {
+--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
+@@ -4060,6 +4060,7 @@ static int tcp_ack(struct sock *sk, cons
+ 	delivered = tcp_newly_delivered(sk, delivered, flag);
+ 	lost = tp->lost - lost;			/* freshly marked lost */
+ 	rs.is_ack_delayed = !!(flag & FLAG_ACK_MAYBE_DELAYED);
+	rs.is_ece = !!(flag & FLAG_ECE);
+ 	tcp_rate_gen(sk, delivered, lost, is_sack_reneg, sack_state.rate);
+ 	tcp_cong_control(sk, ack, delivered, flag, sack_state.rate);
+ 	tcp_xmit_recovery(sk, rexmit);
--- a/debian/patches/misc-bbr3/0006-net-tcp_bbr-v2-introduce-ca_ops-skb_marked_lost-CC-m.patch
+++ b/debian/patches/misc-bbr3/0006-net-tcp_bbr-v2-introduce-ca_ops-skb_marked_lost-CC-m.patch
@@ -0,0 +1,57 @@
+From a3e88432c2ebf12de9c2053a13417ddf2ad4cb4e Mon Sep 17 00:00:00 2001
+From: Neal Cardwell <ncardwell@google.com>
+Date: Tue, 7 Aug 2018 21:52:06 -0400
+Subject: [PATCH 06/19] net-tcp_bbr: v2: introduce ca_ops->skb_marked_lost() CC
+ module callback API
+
+For connections experiencing reordering, RACK can mark packets lost
+long after we receive the SACKs/ACKs hinting that the packets were
+actually lost.
+
+This means that CC modules cannot easily learn the volume of inflight
+data at which packet loss happens by looking at the current inflight
+or even the packets in flight when the most recently SACKed packet was
+sent. To learn this, CC modules need to know how many packets were in
+flight at the time lost packets were sent. This new callback, combined
+with TCP_SKB_CB(skb)->tx.in_flight, allows them to learn this.
+
+This also provides a consistent callback that is invoked whether
+packets are marked lost upon ACK processing, using the RACK reordering
+timer, or at RTO time.
+
+Effort: net-tcp_bbr
+Origin-9xx-SHA1: afcbebe3374e4632ac6714d39e4dc8a8455956f4
+Change-Id: I54826ab53df636be537e5d3c618a46145d12d51a
+Signed-off-by: Alexandre Frade <kernel@xanmod.org>
+---
+ include/net/tcp.h    | 3 +++
+ net/ipv4/tcp_input.c | 5 +++++
+ 2 files changed, 8 insertions(+)
+
+--- a/include/net/tcp.h
+++ b/include/net/tcp.h
+@@ -1184,6 +1184,9 @@ struct tcp_congestion_ops {
+ 	/* override sysctl_tcp_min_tso_segs */
+ 	u32 (*min_tso_segs)(struct sock *sk);
+ 
+	/* react to a specific lost skb (optional) */
+	void (*skb_marked_lost)(struct sock *sk, const struct sk_buff *skb);
+
+ 	/* call when packets are delivered to update cwnd and pacing rate,
+ 	 * after all the ca_state processing. (optional)
+ 	 */
+--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
+@@ -1120,7 +1120,12 @@ static void tcp_verify_retransmit_hint(s
+  */
+ static void tcp_notify_skb_loss_event(struct tcp_sock *tp, const struct sk_buff *skb)
+ {
+	struct sock *sk = (struct sock *)tp;
+	const struct tcp_congestion_ops *ca_ops = inet_csk(sk)->icsk_ca_ops;
+
+ 	tp->lost += tcp_skb_pcount(skb);
+	if (ca_ops->skb_marked_lost)
+		ca_ops->skb_marked_lost(sk, skb);
+ }
+ 
+ void tcp_mark_skb_lost(struct sock *sk, struct sk_buff *skb)
--- a/debian/patches/misc-bbr3/0007-net-tcp_bbr-v2-adjust-skb-tx.in_flight-upon-merge-in.patch
+++ b/debian/patches/misc-bbr3/0007-net-tcp_bbr-v2-adjust-skb-tx.in_flight-upon-merge-in.patch
@@ -0,0 +1,59 @@
+From af7d33e71649b8e2ae00dccf336720a8ab891606 Mon Sep 17 00:00:00 2001
+From: Neal Cardwell <ncardwell@google.com>
+Date: Wed, 1 May 2019 20:16:33 -0400
+Subject: [PATCH 07/19] net-tcp_bbr: v2: adjust skb tx.in_flight upon merge in
+ tcp_shifted_skb()
+
+When tcp_shifted_skb() updates state as adjacent SACKed skbs are
+coalesced, previously the tx.in_flight was not adjusted, so we could
+get contradictory state where the skb's recorded pcount was bigger
+than the tx.in_flight (the number of segments that were in_flight
+after sending the skb).
+
+Normally have a SACKed skb with contradictory pcount/tx.in_flight
+would not matter. However, with SACK reneging, the SACKed bit is
+removed, and an skb once again becomes eligible for retransmitting,
+fragmenting, SACKing, etc. Packetdrill testing verified the following
+sequence is possible in a kernel that does not have this commit:
+
+ - skb N is SACKed
+ - skb N+1 is SACKed and combined with skb N using tcp_shifted_skb()
+   - tcp_shifted_skb() will increase the pcount of prev,
+     but leave tx.in_flight as-is
+   - so prev skb can have pcount > tx.in_flight
+ - RTO, tcp_timeout_mark_lost(), detect reneg,
+   remove "SACKed" bit, mark skb N as lost
+   - find pcount of skb N is greater than its tx.in_flight
+
+I suspect this issue iw what caused the bbr2_inflight_hi_from_lost_skb():
+  WARN_ON_ONCE(inflight_prev < 0)
+to fire in production machines using bbr2.
+
+Effort: net-tcp_bbr
+Origin-9xx-SHA1: 1a3e997e613d2dcf32b947992882854ebe873715
+Change-Id: I1b0b75c27519953430c7db51c6f358f104c7af55
+Signed-off-by: Alexandre Frade <kernel@xanmod.org>
+---
+ net/ipv4/tcp_input.c | 11 +++++++++++
+ 1 file changed, 11 insertions(+)
+
+--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
+@@ -1506,6 +1506,17 @@ static bool tcp_shifted_skb(struct sock
+ 	WARN_ON_ONCE(tcp_skb_pcount(skb) < pcount);
+ 	tcp_skb_pcount_add(skb, -pcount);
+ 
+	/* Adjust tx.in_flight as pcount is shifted from skb to prev. */
+	if (WARN_ONCE(TCP_SKB_CB(skb)->tx.in_flight < pcount,
+		      "prev in_flight: %u skb in_flight: %u pcount: %u",
+		      TCP_SKB_CB(prev)->tx.in_flight,
+		      TCP_SKB_CB(skb)->tx.in_flight,
+		      pcount))
+		TCP_SKB_CB(skb)->tx.in_flight = 0;
+	else
+		TCP_SKB_CB(skb)->tx.in_flight -= pcount;
+	TCP_SKB_CB(prev)->tx.in_flight += pcount;
+
+ 	/* When we're adding to gso_segs == 1, gso_size will be zero,
+ 	 * in theory this shouldn't be necessary but as long as DSACK
+ 	 * code can come after this skb later on it's better to keep
--- a/debian/patches/misc-bbr3/0008-net-tcp_bbr-v2-adjust-skb-tx.in_flight-upon-split-in.patch
+++ b/debian/patches/misc-bbr3/0008-net-tcp_bbr-v2-adjust-skb-tx.in_flight-upon-split-in.patch
@@ -0,0 +1,97 @@
+From a4d44bce49f61f8755f558dc40edff5f8958b7c6 Mon Sep 17 00:00:00 2001
+From: Neal Cardwell <ncardwell@google.com>
+Date: Wed, 1 May 2019 20:16:25 -0400
+Subject: [PATCH 08/19] net-tcp_bbr: v2: adjust skb tx.in_flight upon split in
+ tcp_fragment()
+
+When we fragment an skb that has already been sent, we need to update
+the tx.in_flight for the first skb in the resulting pair ("buff").
+
+Because we were not updating the tx.in_flight, the tx.in_flight value
+was inconsistent with the pcount of the "buff" skb (tx.in_flight would
+be too high). That meant that if the "buff" skb was lost, then
+bbr2_inflight_hi_from_lost_skb() would calculate an inflight_hi value
+that is too high. This could result in longer queues and higher packet
+loss.
+
+Packetdrill testing verified that without this commit, when the second
+half of an skb is SACKed and then later the first half of that skb is
+marked lost, the calculated inflight_hi was incorrect.
+
+Effort: net-tcp_bbr
+Origin-9xx-SHA1: 385f1ddc610798fab2837f9f372857438b25f874
+Origin-9xx-SHA1: a0eb099690af net-tcp_bbr: v2: fix tcp_fragment() tx.in_flight recomputation [prod feb 8 2021; use as a fixup]
+Origin-9xx-SHA1: 885503228153ff0c9114e net-tcp_bbr: v2: introduce tcp_skb_tx_in_flight_is_suspicious() helper for warnings
+Change-Id: I617f8cab4e9be7a0b8e8d30b047bf8645393354d
+Signed-off-by: Alexandre Frade <kernel@xanmod.org>
+---
+ include/net/tcp.h     | 15 +++++++++++++++
+ net/ipv4/tcp_output.c | 26 +++++++++++++++++++++++++-
+ 2 files changed, 40 insertions(+), 1 deletion(-)
+
+--- a/include/net/tcp.h
+++ b/include/net/tcp.h
+@@ -1283,6 +1283,21 @@ static inline bool tcp_skb_sent_after(u6
+ 	return t1 > t2 || (t1 == t2 && after(seq1, seq2));
+ }
+ 
+/* If a retransmit failed due to local qdisc congestion or other local issues,
+ * then we may have called tcp_set_skb_tso_segs() to increase the number of
+ * segments in the skb without increasing the tx.in_flight. In all other cases,
+ * the tx.in_flight should be at least as big as the pcount of the sk_buff.  We
+ * do not have the state to know whether a retransmit failed due to local qdisc
+ * congestion or other local issues, so to avoid spurious warnings we consider
+ * that any skb marked lost may have suffered that fate.
+ */
+static inline bool tcp_skb_tx_in_flight_is_suspicious(u32 skb_pcount,
+						      u32 skb_sacked_flags,
+						      u32 tx_in_flight)
+{
+	return (skb_pcount > tx_in_flight) && !(skb_sacked_flags & TCPCB_LOST);
+}
+
+ /* These functions determine how the current flow behaves in respect of SACK
+  * handling. SACK is negotiated with the peer, and therefore it can vary
+  * between different flows.
+--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
+@@ -1601,7 +1601,7 @@ int tcp_fragment(struct sock *sk, enum t
+ {
+ 	struct tcp_sock *tp = tcp_sk(sk);
+ 	struct sk_buff *buff;
+-	int old_factor;
+	int old_factor, inflight_prev;
+ 	long limit;
+ 	int nlen;
+ 	u8 flags;
+@@ -1676,6 +1676,30 @@ int tcp_fragment(struct sock *sk, enum t
+ 
+ 		if (diff)
+ 			tcp_adjust_pcount(sk, skb, diff);
+
+		inflight_prev = TCP_SKB_CB(skb)->tx.in_flight - old_factor;
+		if (inflight_prev < 0) {
+			WARN_ONCE(tcp_skb_tx_in_flight_is_suspicious(
+					  old_factor,
+					  TCP_SKB_CB(skb)->sacked,
+					  TCP_SKB_CB(skb)->tx.in_flight),
+				  "inconsistent: tx.in_flight: %u "
+				  "old_factor: %d mss: %u sacked: %u "
+				  "1st pcount: %d 2nd pcount: %d "
+				  "1st len: %u 2nd len: %u ",
+				  TCP_SKB_CB(skb)->tx.in_flight, old_factor,
+				  mss_now, TCP_SKB_CB(skb)->sacked,
+				  tcp_skb_pcount(skb), tcp_skb_pcount(buff),
+				  skb->len, buff->len);
+			inflight_prev = 0;
+		}
+		/* Set 1st tx.in_flight as if 1st were sent by itself: */
+		TCP_SKB_CB(skb)->tx.in_flight = inflight_prev +
+						 tcp_skb_pcount(skb);
+		/* Set 2nd tx.in_flight with new 1st and 2nd pcounts: */
+		TCP_SKB_CB(buff)->tx.in_flight = inflight_prev +
+						 tcp_skb_pcount(skb) +
+						 tcp_skb_pcount(buff);
+ 	}
+ 
+ 	/* Link BUFF into the send queue. */
--- a/debian/patches/misc-bbr3/0009-net-tcp-add-new-ca-opts-flag-TCP_CONG_WANTS_CE_EVENT.patch
+++ b/debian/patches/misc-bbr3/0009-net-tcp-add-new-ca-opts-flag-TCP_CONG_WANTS_CE_EVENT.patch
@@ -0,0 +1,73 @@
+From 65cca0e8fd954a150ec874650af47f7800ea3049 Mon Sep 17 00:00:00 2001
+From: Yousuk Seung <ysseung@google.com>
+Date: Wed, 23 May 2018 17:55:54 -0700
+Subject: [PATCH 09/19] net-tcp: add new ca opts flag TCP_CONG_WANTS_CE_EVENTS
+
+Add a a new ca opts flag TCP_CONG_WANTS_CE_EVENTS that allows a
+congestion control module to receive CE events.
+
+Currently congestion control modules have to set the TCP_CONG_NEEDS_ECN
+bit in opts flag to receive CE events but this may incur changes in ECN
+behavior elsewhere. This patch adds a new bit TCP_CONG_WANTS_CE_EVENTS
+that allows congestion control modules to receive CE events
+independently of TCP_CONG_NEEDS_ECN.
+
+Effort: net-tcp
+Origin-9xx-SHA1: 9f7e14716cde760bc6c67ef8ef7e1ee48501d95b
+Change-Id: I2255506985242f376d910c6fd37daabaf4744f24
+Signed-off-by: Alexandre Frade <kernel@xanmod.org>
+---
+ include/net/tcp.h    | 14 +++++++++++++-
+ net/ipv4/tcp_input.c |  4 ++--
+ 2 files changed, 15 insertions(+), 3 deletions(-)
+
+--- a/include/net/tcp.h
+++ b/include/net/tcp.h
+@@ -1119,7 +1119,11 @@ enum tcp_ca_ack_event_flags {
+ #define TCP_CONG_NON_RESTRICTED 0x1
+ /* Requires ECN/ECT set on all packets */
+ #define TCP_CONG_NEEDS_ECN	0x2
+-#define TCP_CONG_MASK	(TCP_CONG_NON_RESTRICTED | TCP_CONG_NEEDS_ECN)
+/* Wants notification of CE events (CA_EVENT_ECN_IS_CE, CA_EVENT_ECN_NO_CE). */
+#define TCP_CONG_WANTS_CE_EVENTS	0x4
+#define TCP_CONG_MASK	(TCP_CONG_NON_RESTRICTED | \
+			 TCP_CONG_NEEDS_ECN | \
+			 TCP_CONG_WANTS_CE_EVENTS)
+ 
+ union tcp_cc_info;
+ 
+@@ -1251,6 +1255,14 @@ static inline char *tcp_ca_get_name_by_k
+ }
+ #endif
+ 
+static inline bool tcp_ca_wants_ce_events(const struct sock *sk)
+{
+	const struct inet_connection_sock *icsk = inet_csk(sk);
+
+	return icsk->icsk_ca_ops->flags & (TCP_CONG_NEEDS_ECN |
+					   TCP_CONG_WANTS_CE_EVENTS);
+}
+
+ static inline bool tcp_ca_needs_ecn(const struct sock *sk)
+ {
+ 	const struct inet_connection_sock *icsk = inet_csk(sk);
+--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
+@@ -370,7 +370,7 @@ static void __tcp_ecn_check_ce(struct so
+ 			tcp_enter_quickack_mode(sk, 2);
+ 		break;
+ 	case INET_ECN_CE:
+-		if (tcp_ca_needs_ecn(sk))
+		if (tcp_ca_wants_ce_events(sk))
+ 			tcp_ca_event(sk, CA_EVENT_ECN_IS_CE);
+ 
+ 		if (!(tp->ecn_flags & TCP_ECN_DEMAND_CWR)) {
+@@ -381,7 +381,7 @@ static void __tcp_ecn_check_ce(struct so
+ 		tp->ecn_flags |= TCP_ECN_SEEN;
+ 		break;
+ 	default:
+-		if (tcp_ca_needs_ecn(sk))
+		if (tcp_ca_wants_ce_events(sk))
+ 			tcp_ca_event(sk, CA_EVENT_ECN_NO_CE);
+ 		tp->ecn_flags |= TCP_ECN_SEEN;
+ 		break;
--- a/debian/patches/misc-bbr3/0010-net-tcp-re-generalize-TSO-sizing-in-TCP-CC-module-AP.patch
+++ b/debian/patches/misc-bbr3/0010-net-tcp-re-generalize-TSO-sizing-in-TCP-CC-module-AP.patch
@@ -0,0 +1,118 @@
+From 3acb852e1cfcdeea388bd428c6dd81609fd40792 Mon Sep 17 00:00:00 2001
+From: Neal Cardwell <ncardwell@google.com>
+Date: Fri, 27 Sep 2019 17:10:26 -0400
+Subject: [PATCH 10/19] net-tcp: re-generalize TSO sizing in TCP CC module API
+
+Reorganize the API for CC modules so that the CC module once again
+gets complete control of the TSO sizing decision. This is how the API
+was set up around 2016 and the initial BBRv1 upstreaming. Later Eric
+Dumazet simplified it. But with wider testing it now seems that to
+avoid CPU regressions BBR needs to have a different TSO sizing
+function.
+
+This is necessary to handle cases where there are many flows
+bottlenecked on the sender host's NIC, in which case BBR's pacing rate
+is much lower than CUBIC/Reno/DCTCP's. Why does this happen? Because
+BBR's pacing rate adapts to the low bandwidth share each flow sees. By
+contrast, CUBIC/Reno/DCTCP see no loss or ECN, so they grow a very
+large cwnd, and thus large pacing rate and large TSO burst size.
+
+Change-Id: Ic8ccfdbe4010ee8d4bf6a6334c48a2fceb2171ea
+Signed-off-by: Alexandre Frade <kernel@xanmod.org>
+---
+ include/net/tcp.h     |  4 ++--
+ net/ipv4/tcp_bbr.c    | 37 ++++++++++++++++++++++++++-----------
+ net/ipv4/tcp_output.c | 11 +++++------
+ 3 files changed, 33 insertions(+), 19 deletions(-)
+
+--- a/include/net/tcp.h
+++ b/include/net/tcp.h
+@@ -1185,8 +1185,8 @@ struct tcp_congestion_ops {
+ 	/* hook for packet ack accounting (optional) */
+ 	void (*pkts_acked)(struct sock *sk, const struct ack_sample *sample);
+ 
+-	/* override sysctl_tcp_min_tso_segs */
+-	u32 (*min_tso_segs)(struct sock *sk);
+	/* pick target number of segments per TSO/GSO skb (optional): */
+	u32 (*tso_segs)(struct sock *sk, unsigned int mss_now);
+ 
+ 	/* react to a specific lost skb (optional) */
+ 	void (*skb_marked_lost)(struct sock *sk, const struct sk_buff *skb);
+--- a/net/ipv4/tcp_bbr.c
+++ b/net/ipv4/tcp_bbr.c
+@@ -301,20 +301,35 @@ __bpf_kfunc static u32 bbr_min_tso_segs(
+ 	return READ_ONCE(sk->sk_pacing_rate) < (bbr_min_tso_rate >> 3) ? 1 : 2;
+ }
+ 
+/* Return the number of segments BBR would like in a TSO/GSO skb, given
+ * a particular max gso size as a constraint.
+ */
+static u32 bbr_tso_segs_generic(struct sock *sk, unsigned int mss_now,
+				u32 gso_max_size)
+{
+	u32 segs;
+	u64 bytes;
+
+	/* Budget a TSO/GSO burst size allowance based on bw (pacing_rate). */
+	bytes = READ_ONCE(sk->sk_pacing_rate) >> READ_ONCE(sk->sk_pacing_shift);
+
+	bytes = min_t(u32, bytes, gso_max_size - 1 - MAX_TCP_HEADER);
+	segs = max_t(u32, bytes / mss_now, bbr_min_tso_segs(sk));
+	return segs;
+}
+
+/* Custom tcp_tso_autosize() for BBR, used at transmit time to cap skb size. */
+static u32  bbr_tso_segs(struct sock *sk, unsigned int mss_now)
+{
+	return bbr_tso_segs_generic(sk, mss_now, sk->sk_gso_max_size);
+}
+
+/* Like bbr_tso_segs(), using mss_cache, ignoring driver's sk_gso_max_size. */
+ static u32 bbr_tso_segs_goal(struct sock *sk)
+ {
+ 	struct tcp_sock *tp = tcp_sk(sk);
+-	u32 segs, bytes;
+-
+-	/* Sort of tcp_tso_autosize() but ignoring
+-	 * driver provided sk_gso_max_size.
+-	 */
+-	bytes = min_t(unsigned long,
+-		      READ_ONCE(sk->sk_pacing_rate) >> READ_ONCE(sk->sk_pacing_shift),
+-		      GSO_LEGACY_MAX_SIZE - 1 - MAX_TCP_HEADER);
+-	segs = max_t(u32, bytes / tp->mss_cache, bbr_min_tso_segs(sk));
+ 
+-	return min(segs, 0x7FU);
+	return bbr_tso_segs_generic(sk, tp->mss_cache, GSO_MAX_SIZE);
+ }
+ 
+ /* Save "last known good" cwnd so we can restore it after losses or PROBE_RTT */
+@@ -1150,7 +1165,7 @@ static struct tcp_congestion_ops tcp_bbr
+ 	.undo_cwnd	= bbr_undo_cwnd,
+ 	.cwnd_event	= bbr_cwnd_event,
+ 	.ssthresh	= bbr_ssthresh,
+-	.min_tso_segs	= bbr_min_tso_segs,
+	.tso_segs	= bbr_tso_segs,
+ 	.get_info	= bbr_get_info,
+ 	.set_state	= bbr_set_state,
+ };
+--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
+@@ -2057,13 +2057,12 @@ static u32 tcp_tso_autosize(const struct
+ static u32 tcp_tso_segs(struct sock *sk, unsigned int mss_now)
+ {
+ 	const struct tcp_congestion_ops *ca_ops = inet_csk(sk)->icsk_ca_ops;
+-	u32 min_tso, tso_segs;
+	u32 tso_segs;
+ 
+-	min_tso = ca_ops->min_tso_segs ?
+-			ca_ops->min_tso_segs(sk) :
+-			READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_min_tso_segs);
+-
+-	tso_segs = tcp_tso_autosize(sk, mss_now, min_tso);
+	tso_segs = ca_ops->tso_segs ?
+		ca_ops->tso_segs(sk, mss_now) :
+		tcp_tso_autosize(sk, mss_now,
+				 sock_net(sk)->ipv4.sysctl_tcp_min_tso_segs);
+ 	return min_t(u32, tso_segs, sk->sk_gso_max_segs);
+ }
+ 
--- a/debian/patches/misc-bbr3/0011-net-tcp-add-fast_ack_mode-1-skip-rwin-check-in-tcp_f.patch
+++ b/debian/patches/misc-bbr3/0011-net-tcp-add-fast_ack_mode-1-skip-rwin-check-in-tcp_f.patch
@@ -0,0 +1,72 @@
+From 3741ada76bab5111cbb9c279cf27e67a0334eb05 Mon Sep 17 00:00:00 2001
+From: Neal Cardwell <ncardwell@google.com>
+Date: Sun, 7 Jan 2024 21:11:26 -0300
+Subject: [PATCH 11/19] net-tcp: add fast_ack_mode=1: skip rwin check in
+ tcp_fast_ack_mode__tcp_ack_snd_check()
+
+Add logic for an experimental TCP connection behavior, enabled with
+tp->fast_ack_mode = 1, which disables checking the receive window
+before sending an ack in __tcp_ack_snd_check(). If this behavior is
+enabled, the data receiver sends an ACK if the amount of data is >
+RCV.MSS.
+
+Change-Id: Iaa0a0fd7108221f883137a79d5bfa724f1b096d4
+Signed-off-by: Alexandre Frade <kernel@xanmod.org>
+---
+ include/linux/tcp.h  | 3 ++-
+ net/ipv4/tcp.c       | 1 +
+ net/ipv4/tcp_cong.c  | 1 +
+ net/ipv4/tcp_input.c | 5 +++--
+ 4 files changed, 7 insertions(+), 3 deletions(-)
+
+--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
+@@ -369,7 +369,8 @@ struct tcp_sock {
+ 	u8	compressed_ack;
+ 	u8	dup_ack_counter:2,
+ 		tlp_retrans:1,	/* TLP is a retransmission */
+-		unused:5;
+		fast_ack_mode:2, /* which fast ack mode ? */
+		unused:3;
+ 	u8	thin_lto    : 1,/* Use linear timeouts for thin streams */
+ 		fastopen_connect:1, /* FASTOPEN_CONNECT sockopt */
+ 		fastopen_no_cookie:1, /* Allow send/recv SYN+data without a cookie */
+--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
+@@ -3123,6 +3123,7 @@ int tcp_disconnect(struct sock *sk, int
+ 	tp->rx_opt.dsack = 0;
+ 	tp->rx_opt.num_sacks = 0;
+ 	tp->rcv_ooopack = 0;
+	tp->fast_ack_mode = 0;
+ 
+ 
+ 	/* Clean up fastopen related fields */
+--- a/net/ipv4/tcp_cong.c
+++ b/net/ipv4/tcp_cong.c
+@@ -237,6 +237,7 @@ void tcp_init_congestion_control(struct
+ 	struct inet_connection_sock *icsk = inet_csk(sk);
+ 
+ 	tcp_sk(sk)->prior_ssthresh = 0;
+	tcp_sk(sk)->fast_ack_mode = 0;
+ 	if (icsk->icsk_ca_ops->init)
+ 		icsk->icsk_ca_ops->init(sk);
+ 	if (tcp_ca_needs_ecn(sk))
+--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
+@@ -5763,13 +5763,14 @@ static void __tcp_ack_snd_check(struct s
+ 
+ 	    /* More than one full frame received... */
+ 	if (((tp->rcv_nxt - tp->rcv_wup) > inet_csk(sk)->icsk_ack.rcv_mss &&
+	     (tp->fast_ack_mode == 1 ||
+ 	     /* ... and right edge of window advances far enough.
+ 	      * (tcp_recvmsg() will send ACK otherwise).
+ 	      * If application uses SO_RCVLOWAT, we want send ack now if
+ 	      * we have not received enough bytes to satisfy the condition.
+ 	      */
+-	    (tp->rcv_nxt - tp->copied_seq < sk->sk_rcvlowat ||
+-	     __tcp_select_window(sk) >= tp->rcv_wnd)) ||
+	      (tp->rcv_nxt - tp->copied_seq < sk->sk_rcvlowat ||
+	       __tcp_select_window(sk) >= tp->rcv_wnd))) ||
+ 	    /* We ACK each frame or... */
+ 	    tcp_in_quickack_mode(sk) ||
+ 	    /* Protocol state mandates a one-time immediate ACK */
--- a/debian/patches/misc-bbr3/0012-net-tcp_bbr-v2-record-app-limited-status-of-TLP-repa.patch
+++ b/debian/patches/misc-bbr3/0012-net-tcp_bbr-v2-record-app-limited-status-of-TLP-repa.patch
@@ -0,0 +1,45 @@
+From e5d35b7c882b7001f8a31b14c9f08917230dedc3 Mon Sep 17 00:00:00 2001
+From: Jianfeng Wang <jfwang@google.com>
+Date: Fri, 19 Jun 2020 17:33:45 +0000
+Subject: [PATCH 12/19] net-tcp_bbr: v2: record app-limited status of
+ TLP-repaired flight
+
+When sending a TLP retransmit, record whether the outstanding flight
+of data is application limited. This is important for congestion
+control modules that want to respond to losses repaired by TLP
+retransmits. This is important because the following scenarios convey
+very different information:
+ (1) a packet loss with a small number of packets in flight;
+ (2) a packet loss with the maximum amount of data in flight allowed
+     by the CC module;
+
+Effort: net-tcp_bbr
+Change-Id: Ic8ae567caa4e4bfd5fd82c3d4be12a5d9171655e
+Signed-off-by: Alexandre Frade <kernel@xanmod.org>
+---
+ include/linux/tcp.h   | 3 ++-
+ net/ipv4/tcp_output.c | 1 +
+ 2 files changed, 3 insertions(+), 1 deletion(-)
+
+--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
+@@ -370,7 +370,8 @@ struct tcp_sock {
+ 	u8	dup_ack_counter:2,
+ 		tlp_retrans:1,	/* TLP is a retransmission */
+ 		fast_ack_mode:2, /* which fast ack mode ? */
+-		unused:3;
+		tlp_orig_data_app_limited:1, /* app-limited before TLP rtx? */
+		unused:2;
+ 	u8	thin_lto    : 1,/* Use linear timeouts for thin streams */
+ 		fastopen_connect:1, /* FASTOPEN_CONNECT sockopt */
+ 		fastopen_no_cookie:1, /* Allow send/recv SYN+data without a cookie */
+--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
+@@ -3003,6 +3003,7 @@ void tcp_send_loss_probe(struct sock *sk
+ 	if (WARN_ON(!skb || !tcp_skb_pcount(skb)))
+ 		goto rearm_timer;
+ 
+	tp->tlp_orig_data_app_limited = TCP_SKB_CB(skb)->tx.is_app_limited;
+ 	if (__tcp_retransmit_skb(sk, skb, 1))
+ 		goto rearm_timer;
+ 
--- a/debian/patches/misc-bbr3/0013-net-tcp_bbr-v2-inform-CC-module-of-losses-repaired-b.patch
+++ b/debian/patches/misc-bbr3/0013-net-tcp_bbr-v2-inform-CC-module-of-losses-repaired-b.patch
@@ -0,0 +1,45 @@
+From 77e7c22b63f8934206b1e89c173558c3967f0779 Mon Sep 17 00:00:00 2001
+From: Jianfeng Wang <jfwang@google.com>
+Date: Tue, 16 Jun 2020 17:41:19 +0000
+Subject: [PATCH 13/19] net-tcp_bbr: v2: inform CC module of losses repaired by
+ TLP probe
+
+Before this commit, when there is a packet loss that creates a sequence
+hole that is filled by a TLP loss probe, then tcp_process_tlp_ack()
+only informs the congestion control (CC) module via a back-to-back entry
+and exit of CWR. But some congestion control modules (e.g. BBR) do not
+respond to CWR events.
+
+This commit adds a new CA event with which the core TCP stack notifies
+the CC module when a loss is repaired by a TLP. This will allow CC
+modules that do not use the CWR mechanism to have a custom handler for
+such TLP recoveries.
+
+Effort: net-tcp_bbr
+Change-Id: Ieba72332b401b329bff5a641d2b2043a3fb8f632
+Signed-off-by: Alexandre Frade <kernel@xanmod.org>
+---
+ include/net/tcp.h    | 1 +
+ net/ipv4/tcp_input.c | 1 +
+ 2 files changed, 2 insertions(+)
+
+--- a/include/net/tcp.h
+++ b/include/net/tcp.h
+@@ -1097,6 +1097,7 @@ enum tcp_ca_event {
+ 	CA_EVENT_LOSS,		/* loss timeout */
+ 	CA_EVENT_ECN_NO_CE,	/* ECT set, but not CE marked */
+ 	CA_EVENT_ECN_IS_CE,	/* received CE marked IP packet */
+	CA_EVENT_TLP_RECOVERY,	/* a lost segment was repaired by TLP probe */
+ };
+ 
+ /* Information about inbound ACK, passed to cong_ops->in_ack_event() */
+--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
+@@ -3859,6 +3859,7 @@ static void tcp_process_tlp_ack(struct s
+ 		/* ACK advances: there was a loss, so reduce cwnd. Reset
+ 		 * tlp_high_seq in tcp_init_cwnd_reduction()
+ 		 */
+		tcp_ca_event(sk, CA_EVENT_TLP_RECOVERY);
+ 		tcp_init_cwnd_reduction(sk);
+ 		tcp_set_ca_state(sk, TCP_CA_CWR);
+ 		tcp_end_cwnd_reduction(sk);
--- a/debian/patches/misc-bbr3/0014-net-tcp_bbr-v2-introduce-is_acking_tlp_retrans_seq-i.patch
+++ b/debian/patches/misc-bbr3/0014-net-tcp_bbr-v2-introduce-is_acking_tlp_retrans_seq-i.patch
@@ -0,0 +1,73 @@
+From cab22a8e2e87870e8334a12ffcd0ba04ea81126f Mon Sep 17 00:00:00 2001
+From: Neal Cardwell <ncardwell@google.com>
+Date: Mon, 21 Sep 2020 14:46:26 -0400
+Subject: [PATCH 14/19] net-tcp_bbr: v2: introduce is_acking_tlp_retrans_seq
+ into rate_sample
+
+Introduce is_acking_tlp_retrans_seq into rate_sample. This bool will
+export to the CC module the knowledge of whether the current ACK
+matched a TLP retransmit.
+
+Note that when this bool is true, we cannot yet tell (in general) whether
+this ACK is for the original or the TLP retransmit.
+
+Effort: net-tcp_bbr
+Change-Id: I2e6494332167e75efcbdc99bd5c119034e9c39b4
+Signed-off-by: Alexandre Frade <kernel@xanmod.org>
+---
+ include/net/tcp.h    |  1 +
+ net/ipv4/tcp_input.c | 12 +++++++++---
+ 2 files changed, 10 insertions(+), 3 deletions(-)
+
+--- a/include/net/tcp.h
+++ b/include/net/tcp.h
+@@ -1161,6 +1161,7 @@ struct rate_sample {
+ 	u32  last_end_seq;	/* end_seq of most recently ACKed packet */
+ 	bool is_app_limited;	/* is sample from packet with bubble in pipe? */
+ 	bool is_retrans;	/* is sample from retransmission? */
+	bool is_acking_tlp_retrans_seq;  /* ACKed a TLP retransmit sequence? */
+ 	bool is_ack_delayed;	/* is this (likely) a delayed ACK? */
+ 	bool is_ece;		/* did this ACK have ECN marked? */
+ };
+--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
+@@ -3842,7 +3842,8 @@ static void tcp_replace_ts_recent(struct
+ /* This routine deals with acks during a TLP episode and ends an episode by
+  * resetting tlp_high_seq. Ref: TLP algorithm in draft-ietf-tcpm-rack
+  */
+-static void tcp_process_tlp_ack(struct sock *sk, u32 ack, int flag)
+static void tcp_process_tlp_ack(struct sock *sk, u32 ack, int flag,
+				struct rate_sample *rs)
+ {
+ 	struct tcp_sock *tp = tcp_sk(sk);
+ 
+@@ -3870,6 +3871,11 @@ static void tcp_process_tlp_ack(struct s
+ 			     FLAG_NOT_DUP | FLAG_DATA_SACKED))) {
+ 		/* Pure dupack: original and TLP probe arrived; no loss */
+ 		tp->tlp_high_seq = 0;
+	} else {
+		/* This ACK matches a TLP retransmit. We cannot yet tell if
+		 * this ACK is for the original or the TLP retransmit.
+		 */
+		rs->is_acking_tlp_retrans_seq = 1;
+ 	}
+ }
+ 
+@@ -4053,7 +4059,7 @@ static int tcp_ack(struct sock *sk, cons
+ 	tcp_rack_update_reo_wnd(sk, &rs);
+ 
+ 	if (tp->tlp_high_seq)
+-		tcp_process_tlp_ack(sk, ack, flag);
+		tcp_process_tlp_ack(sk, ack, flag, &rs);
+ 
+ 	if (tcp_ack_is_dubious(sk, flag)) {
+ 		if (!(flag & (FLAG_SND_UNA_ADVANCED |
+@@ -4097,7 +4103,7 @@ no_queue:
+ 	tcp_ack_probe(sk);
+ 
+ 	if (tp->tlp_high_seq)
+-		tcp_process_tlp_ack(sk, ack, flag);
+		tcp_process_tlp_ack(sk, ack, flag, &rs);
+ 	return 1;
+ 
+ old_ack:
--- a/debian/patches/misc-bbr3/0015-tcp-introduce-per-route-feature-RTAX_FEATURE_ECN_LOW.patch
+++ b/debian/patches/misc-bbr3/0015-tcp-introduce-per-route-feature-RTAX_FEATURE_ECN_LOW.patch
@@ -0,0 +1,112 @@
+From 38dd25482f815d949fec91edd7694b2f15823f67 Mon Sep 17 00:00:00 2001
+From: David Morley <morleyd@google.com>
+Date: Fri, 14 Jul 2023 11:07:56 -0400
+Subject: [PATCH 15/19] tcp: introduce per-route feature RTAX_FEATURE_ECN_LOW
+
+Define and implement a new per-route feature, RTAX_FEATURE_ECN_LOW.
+
+This feature indicates that the given destination network is a
+low-latency ECN environment, meaning both that ECN CE marks are
+applied by the network using a low-latency marking threshold and also
+that TCP endpoints provide precise per-data-segment ECN feedback in
+ACKs (where the ACK ECE flag echoes the received CE status of all
+newly-acknowledged data segments). This feature indication can be used
+by congestion control algorithms to decide how to interpret ECN
+signals over the given destination network.
+
+This feature is appropriate for datacenter-style ECN marking, such as
+the ECN marking approach expected by DCTCP or BBR congestion control
+modules.
+
+Signed-off-by: David Morley <morleyd@google.com>
+Signed-off-by: Neal Cardwell <ncardwell@google.com>
+Signed-off-by: Yuchung Cheng <ycheng@google.com>
+Tested-by: David Morley <morleyd@google.com>
+Change-Id: I6bc06e9c6cb426fbae7243fc71c9a8c18175f5d3
+Signed-off-by: Alexandre Frade <kernel@xanmod.org>
+---
+ include/net/tcp.h              | 10 ++++++++++
+ include/uapi/linux/rtnetlink.h |  4 +++-
+ net/ipv4/tcp_minisocks.c       |  2 ++
+ net/ipv4/tcp_output.c          |  6 ++++--
+ 4 files changed, 19 insertions(+), 3 deletions(-)
+
+--- a/include/net/tcp.h
+++ b/include/net/tcp.h
+@@ -375,6 +375,7 @@ static inline void tcp_dec_quickack_mode
+ #define	TCP_ECN_QUEUE_CWR	2
+ #define	TCP_ECN_DEMAND_CWR	4
+ #define	TCP_ECN_SEEN		8
+#define	TCP_ECN_LOW		16
+ 
+ enum tcp_tw_status {
+ 	TCP_TW_SUCCESS = 0,
+@@ -777,6 +778,15 @@ static inline void tcp_fast_path_check(s
+ 		tcp_fast_path_on(tp);
+ }
+ 
+static inline void tcp_set_ecn_low_from_dst(struct sock *sk,
+					    const struct dst_entry *dst)
+{
+	struct tcp_sock *tp = tcp_sk(sk);
+
+	if (dst_feature(dst, RTAX_FEATURE_ECN_LOW))
+		tp->ecn_flags |= TCP_ECN_LOW;
+}
+
+ u32 tcp_delack_max(const struct sock *sk);
+ 
+ /* Compute the actual rto_min value */
+--- a/include/uapi/linux/rtnetlink.h
+++ b/include/uapi/linux/rtnetlink.h
+@@ -507,12 +507,14 @@ enum {
+ #define RTAX_FEATURE_TIMESTAMP		(1 << 2) /* unused */
+ #define RTAX_FEATURE_ALLFRAG		(1 << 3) /* unused */
+ #define RTAX_FEATURE_TCP_USEC_TS	(1 << 4)
+#define RTAX_FEATURE_ECN_LOW		(1 << 5)
+ 
+ #define RTAX_FEATURE_MASK	(RTAX_FEATURE_ECN |		\
+ 				 RTAX_FEATURE_SACK |		\
+ 				 RTAX_FEATURE_TIMESTAMP |	\
+ 				 RTAX_FEATURE_ALLFRAG |		\
+-				 RTAX_FEATURE_TCP_USEC_TS)
+				 RTAX_FEATURE_TCP_USEC_TS |	\
+				 RTAX_FEATURE_ECN_LOW)
+ 
+ struct rta_session {
+ 	__u8	proto;
+--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
+@@ -459,6 +459,8 @@ void tcp_ca_openreq_child(struct sock *s
+ 	u32 ca_key = dst_metric(dst, RTAX_CC_ALGO);
+ 	bool ca_got_dst = false;
+ 
+	tcp_set_ecn_low_from_dst(sk, dst);
+
+ 	if (ca_key != TCP_CA_UNSPEC) {
+ 		const struct tcp_congestion_ops *ca;
+ 
+--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
+@@ -336,10 +336,9 @@ static void tcp_ecn_send_syn(struct sock
+ 	bool bpf_needs_ecn = tcp_bpf_ca_needs_ecn(sk);
+ 	bool use_ecn = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_ecn) == 1 ||
+ 		tcp_ca_needs_ecn(sk) || bpf_needs_ecn;
+	const struct dst_entry *dst = __sk_dst_get(sk);
+ 
+ 	if (!use_ecn) {
+-		const struct dst_entry *dst = __sk_dst_get(sk);
+-
+ 		if (dst && dst_feature(dst, RTAX_FEATURE_ECN))
+ 			use_ecn = true;
+ 	}
+@@ -351,6 +350,9 @@ static void tcp_ecn_send_syn(struct sock
+ 		tp->ecn_flags = TCP_ECN_OK;
+ 		if (tcp_ca_needs_ecn(sk) || bpf_needs_ecn)
+ 			INET_ECN_xmit(sk);
+
+		if (dst)
+			tcp_set_ecn_low_from_dst(sk, dst);
+ 	}
+ }
+ 
--- a/debian/patches/misc-bbr3/0016-net-tcp_bbr-v3-update-TCP-bbr-congestion-control-mod.patch
+++ b/debian/patches/misc-bbr3/0016-net-tcp_bbr-v3-update-TCP-bbr-congestion-control-mod.patch
--- a/debian/patches/misc-bbr3/0017-net-tcp_bbr-v3-ensure-ECN-enabled-BBR-flows-set-ECT-.patch
+++ b/debian/patches/misc-bbr3/0017-net-tcp_bbr-v3-ensure-ECN-enabled-BBR-flows-set-ECT-.patch
@@ -0,0 +1,59 @@
+From 99e86f904f246ae9ec7a13d1b920eaf4a8c2d47b Mon Sep 17 00:00:00 2001
+From: Adithya Abraham Philip <abrahamphilip@google.com>
+Date: Fri, 11 Jun 2021 21:56:10 +0000
+Subject: [PATCH 17/19] net-tcp_bbr: v3: ensure ECN-enabled BBR flows set ECT
+ on retransmits
+
+Adds a new flag TCP_ECN_ECT_PERMANENT that is used by CCAs to
+indicate that retransmitted packets and pure ACKs must have the
+ECT bit set. This is necessary for BBR, which when using
+ECN expects ECT to be set even on retransmitted packets and ACKs.
+
+Previous to this addition of TCP_ECN_ECT_PERMANENT, CCAs which can use
+ECN but don't "need" it did not have a way to indicate that ECT should
+be set on retransmissions/ACKs.
+
+Signed-off-by: Adithya Abraham Philip <abrahamphilip@google.com>
+Signed-off-by: Neal Cardwell <ncardwell@google.com>
+Change-Id: I8b048eaab35e136fe6501ef6cd89fd9faa15e6d2
+Signed-off-by: Alexandre Frade <kernel@xanmod.org>
+---
+ include/net/tcp.h     | 1 +
+ net/ipv4/tcp_bbr.c    | 3 +++
+ net/ipv4/tcp_output.c | 3 ++-
+ 3 files changed, 6 insertions(+), 1 deletion(-)
+
+--- a/include/net/tcp.h
+++ b/include/net/tcp.h
+@@ -376,6 +376,7 @@ static inline void tcp_dec_quickack_mode
+ #define	TCP_ECN_DEMAND_CWR	4
+ #define	TCP_ECN_SEEN		8
+ #define	TCP_ECN_LOW		16
+#define	TCP_ECN_ECT_PERMANENT	32
+ 
+ enum tcp_tw_status {
+ 	TCP_TW_SUCCESS = 0,
+--- a/net/ipv4/tcp_bbr.c
+++ b/net/ipv4/tcp_bbr.c
+@@ -2151,6 +2151,9 @@ __bpf_kfunc static void bbr_init(struct
+ 	bbr->plb.pause_until = 0;
+ 
+ 	tp->fast_ack_mode = bbr_fast_ack_mode ? 1 : 0;
+
+	if (bbr_can_use_ecn(sk))
+		tp->ecn_flags |= TCP_ECN_ECT_PERMANENT;
+ }
+ 
+ /* BBR marks the current round trip as a loss round. */
+--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
+@@ -390,7 +390,8 @@ static void tcp_ecn_send(struct sock *sk
+ 				th->cwr = 1;
+ 				skb_shinfo(skb)->gso_type |= SKB_GSO_TCP_ECN;
+ 			}
+-		} else if (!tcp_ca_needs_ecn(sk)) {
+		} else if (!(tp->ecn_flags & TCP_ECN_ECT_PERMANENT) &&
+			!tcp_ca_needs_ecn(sk)) {
+ 			/* ACK or retransmitted segment: clear ECT|CE */
+ 			INET_ECN_dontxmit(sk);
+ 		}
--- a/debian/patches/misc-bbr3/0018-tcp-export-TCPI_OPT_ECN_LOW-in-tcp_info-tcpi_options.patch
+++ b/debian/patches/misc-bbr3/0018-tcp-export-TCPI_OPT_ECN_LOW-in-tcp_info-tcpi_options.patch
@@ -0,0 +1,38 @@
+From 5d7cb61552d374bcaaa95022129b4ca1eace1c33 Mon Sep 17 00:00:00 2001
+From: Neal Cardwell <ncardwell@google.com>
+Date: Sun, 23 Jul 2023 23:25:34 -0400
+Subject: [PATCH 18/19] tcp: export TCPI_OPT_ECN_LOW in tcp_info tcpi_options
+ field
+
+Analogous to other important ECN information, export TCPI_OPT_ECN_LOW
+in tcp_info tcpi_options field.
+
+Signed-off-by: Neal Cardwell <ncardwell@google.com>
+Change-Id: I08d8d8c7e8780e6e37df54038ee50301ac5a0320
+Signed-off-by: Alexandre Frade <kernel@xanmod.org>
+---
+ include/uapi/linux/tcp.h | 1 +
+ net/ipv4/tcp.c           | 2 ++
+ 2 files changed, 3 insertions(+)
+
+--- a/include/uapi/linux/tcp.h
+++ b/include/uapi/linux/tcp.h
+@@ -178,6 +178,7 @@ enum tcp_fastopen_client_fail {
+ #define TCPI_OPT_ECN_SEEN	16 /* we received at least one packet with ECT */
+ #define TCPI_OPT_SYN_DATA	32 /* SYN-ACK acked data in SYN sent or rcvd */
+ #define TCPI_OPT_USEC_TS	64 /* usec timestamps */
+#define TCPI_OPT_ECN_LOW	128 /* Low-latency ECN configured at init */
+ 
+ /*
+  * Sender's congestion state indicating normal or abnormal situations
+--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
+@@ -3850,6 +3850,8 @@ void tcp_get_info(struct sock *sk, struc
+ 		info->tcpi_options |= TCPI_OPT_ECN;
+ 	if (tp->ecn_flags & TCP_ECN_SEEN)
+ 		info->tcpi_options |= TCPI_OPT_ECN_SEEN;
+	if (tp->ecn_flags & TCP_ECN_LOW)
+		info->tcpi_options |= TCPI_OPT_ECN_LOW;
+ 	if (tp->syn_data_acked)
+ 		info->tcpi_options |= TCPI_OPT_SYN_DATA;
+ 	if (tp->tcp_usec_ts)
--- a/debian/patches/misc-bbr3/0019-x86-cfi-bpf-Add-tso_segs-and-skb_marked_lost-to-bpf_.patch
+++ b/debian/patches/misc-bbr3/0019-x86-cfi-bpf-Add-tso_segs-and-skb_marked_lost-to-bpf_.patch
@@ -0,0 +1,42 @@
+From 39838c2f0b09bec02004c092904aada85da2bc2e Mon Sep 17 00:00:00 2001
+From: Alexandre Frade <kernel@xanmod.org>
+Date: Mon, 11 Mar 2024 12:01:13 -0300
+Subject: [PATCH 19/19] x86/cfi,bpf: Add tso_segs and skb_marked_lost to
+ bpf_struct_ops CFI
+
+Rebased-by: Oleksandr Natalenko <oleksandr@natalenko.name>
+[ https://github.com/sirlucjan/kernel-patches/blob/master/6.8/bbr3-patches/0001-tcp-bbr3-initial-import.patch ]
+Signed-off-by: Alexandre Frade <kernel@xanmod.org>
+---
+ net/ipv4/bpf_tcp_ca.c | 9 +++++++--
+ 1 file changed, 7 insertions(+), 2 deletions(-)
+
+--- a/net/ipv4/bpf_tcp_ca.c
+++ b/net/ipv4/bpf_tcp_ca.c
+@@ -305,11 +305,15 @@ static void bpf_tcp_ca_pkts_acked(struct
+ {
+ }
+ 
+-static u32 bpf_tcp_ca_min_tso_segs(struct sock *sk)
+static u32 bpf_tcp_ca_tso_segs(struct sock *sk, unsigned int mss_now)
+ {
+ 	return 0;
+ }
+ 
+static void bpf_tcp_ca_skb_marked_lost(struct sock *sk, const struct sk_buff *skb)
+{
+}
+
+ static void bpf_tcp_ca_cong_control(struct sock *sk, u32 ack, int flag,
+ 				    const struct rate_sample *rs)
+ {
+@@ -340,7 +344,8 @@ static struct tcp_congestion_ops __bpf_o
+ 	.cwnd_event = bpf_tcp_ca_cwnd_event,
+ 	.in_ack_event = bpf_tcp_ca_in_ack_event,
+ 	.pkts_acked = bpf_tcp_ca_pkts_acked,
+-	.min_tso_segs = bpf_tcp_ca_min_tso_segs,
+	.tso_segs = bpf_tcp_ca_tso_segs,
+	.skb_marked_lost = bpf_tcp_ca_skb_marked_lost,
+ 	.cong_control = bpf_tcp_ca_cong_control,
+ 	.undo_cwnd = bpf_tcp_ca_undo_cwnd,
+ 	.sndbuf_expand = bpf_tcp_ca_sndbuf_expand,