release 6.16.3 (preliminary)
This commit is contained in:
@@ -46,7 +46,7 @@ Signed-off-by: Alexandre Frade <kernel@xanmod.org>
|
||||
Binderfs is a pseudo-filesystem for the Android Binder IPC driver
|
||||
--- a/drivers/android/binder.c
|
||||
+++ b/drivers/android/binder.c
|
||||
@@ -7043,9 +7043,20 @@ err_alloc_device_names_failed:
|
||||
@@ -7107,9 +7107,20 @@ err_alloc_device_names_failed:
|
||||
return ret;
|
||||
}
|
||||
|
||||
@@ -185,7 +185,7 @@ Signed-off-by: Alexandre Frade <kernel@xanmod.org>
|
||||
return container_of(ns, struct ipc_namespace, ns);
|
||||
--- a/mm/vmalloc.c
|
||||
+++ b/mm/vmalloc.c
|
||||
@@ -3190,6 +3190,7 @@ struct vm_struct *get_vm_area(unsigned l
|
||||
@@ -3231,6 +3231,7 @@ struct vm_struct *get_vm_area(unsigned l
|
||||
NUMA_NO_NODE, GFP_KERNEL,
|
||||
__builtin_return_address(0));
|
||||
}
|
||||
|
@@ -1,7 +1,7 @@
|
||||
From a78b8dee94d4742c4696c55c0eec964802e812ac Mon Sep 17 00:00:00 2001
|
||||
From 5d06324b94cc7c38e4a7943c1a0f25b118819afc Mon Sep 17 00:00:00 2001
|
||||
From: Alexandre Frade <kernel@xanmod.org>
|
||||
Date: Mon, 27 Feb 2023 01:38:18 +0000
|
||||
Subject: [PATCH 1/2] netfilter: Add netfilter nf_tables fullcone support
|
||||
Subject: [PATCH] netfilter: Add netfilter nf_tables fullcone support
|
||||
|
||||
Signed-off-by: Syrone Wong <wong.syrone@gmail.com>
|
||||
Signed-off-by: Alexandre Frade <kernel@xanmod.org>
|
||||
@@ -1482,9 +1482,9 @@ Signed-off-by: Alexandre Frade <kernel@xanmod.org>
|
||||
+ enum ip_conntrack_info ctinfo;
|
||||
+ struct nf_conn_nat *nat;
|
||||
+ struct nf_conntrack_tuple *ct_tuple, *ct_tuple_origin;
|
||||
+ uint16_t port, original_port, want_port;
|
||||
+ uint16_t port, original_port, want_port = 0;
|
||||
+ uint8_t protonum;
|
||||
+ bool is_src_mapping_active;
|
||||
+ bool is_src_mapping_active = false;
|
||||
+
|
||||
+ /* NFPROTO specific def */
|
||||
+ struct nat_mapping *mapping, *src_mapping;
|
||||
|
@@ -1,7 +1,7 @@
|
||||
From 242e385bcd49ee7ea5332b27864f81aab9b11718 Mon Sep 17 00:00:00 2001
|
||||
From b735384e46e7fe572615643b466ec6b3c410256a Mon Sep 17 00:00:00 2001
|
||||
From: Felix Fietkau <nbd@nbd.name>
|
||||
Date: Tue, 20 Feb 2018 15:56:02 +0100
|
||||
Subject: [PATCH 2/2] netfilter: add xt_FLOWOFFLOAD target
|
||||
Subject: [PATCH] netfilter: add xt_FLOWOFFLOAD target
|
||||
|
||||
Signed-off-by: Felix Fietkau <nbd@nbd.name>
|
||||
Signed-off-by: Alexandre Frade <kernel@xanmod.org>
|
||||
@@ -11,8 +11,8 @@ Signed-off-by: Alexandre Frade <kernel@xanmod.org>
|
||||
net/netfilter/Kconfig | 9 +
|
||||
net/netfilter/Makefile | 1 +
|
||||
net/netfilter/nf_flow_table_core.c | 5 +-
|
||||
net/netfilter/xt_FLOWOFFLOAD.c | 698 ++++++++++++++++++
|
||||
6 files changed, 732 insertions(+), 3 deletions(-)
|
||||
net/netfilter/xt_FLOWOFFLOAD.c | 701 ++++++++++++++++++
|
||||
6 files changed, 735 insertions(+), 3 deletions(-)
|
||||
create mode 100644 include/uapi/linux/netfilter/xt_FLOWOFFLOAD.h
|
||||
create mode 100644 net/netfilter/xt_FLOWOFFLOAD.c
|
||||
|
||||
@@ -108,7 +108,7 @@ Signed-off-by: Alexandre Frade <kernel@xanmod.org>
|
||||
{
|
||||
--- /dev/null
|
||||
+++ b/net/netfilter/xt_FLOWOFFLOAD.c
|
||||
@@ -0,0 +1,698 @@
|
||||
@@ -0,0 +1,701 @@
|
||||
+/*
|
||||
+ * Copyright (C) 2018-2021 Felix Fietkau <nbd@nbd.name>
|
||||
+ *
|
||||
@@ -578,7 +578,7 @@ Signed-off-by: Alexandre Frade <kernel@xanmod.org>
|
||||
+ const struct xt_flowoffload_target_info *info = par->targinfo;
|
||||
+ struct tcphdr _tcph, *tcph = NULL;
|
||||
+ enum ip_conntrack_info ctinfo;
|
||||
+ enum ip_conntrack_dir dir;
|
||||
+ enum ip_conntrack_dir dir = IP_CT_DIR_ORIGINAL;
|
||||
+ struct nf_flow_route route = {};
|
||||
+ struct flow_offload *flow = NULL;
|
||||
+ struct net_device *devs[2] = {};
|
||||
@@ -804,6 +804,9 @@ Signed-off-by: Alexandre Frade <kernel@xanmod.org>
|
||||
+ nf_flow_table_free(&flowtable[1].ft);
|
||||
+}
|
||||
+
|
||||
+MODULE_LICENSE("GPL");
|
||||
+module_init(xt_flowoffload_tg_init);
|
||||
+module_exit(xt_flowoffload_tg_exit);
|
||||
+
|
||||
+MODULE_LICENSE("GPL");
|
||||
+MODULE_AUTHOR("Felix Fietkau <nbd@nbd.name>");
|
||||
+MODULE_DESCRIPTION("Netfilter FLOWOFFLOAD nf_flow_offload module");
|
||||
|
@@ -1,52 +0,0 @@
|
||||
From 1e164adec73236b05d5b84846a460082d3d211d2 Mon Sep 17 00:00:00 2001
|
||||
From: Neal Cardwell <ncardwell@google.com>
|
||||
Date: Tue, 11 Jun 2019 12:26:55 -0400
|
||||
Subject: [PATCH 01/18] net-tcp_bbr: broaden app-limited rate sample detection
|
||||
|
||||
This commit is a bug fix for the Linux TCP app-limited
|
||||
(application-limited) logic that is used for collecting rate
|
||||
(bandwidth) samples.
|
||||
|
||||
Previously the app-limited logic only looked for "bubbles" of
|
||||
silence in between application writes, by checking at the start
|
||||
of each sendmsg. But "bubbles" of silence can also happen before
|
||||
retransmits: e.g. bubbles can happen between an application write
|
||||
and a retransmit, or between two retransmits.
|
||||
|
||||
Retransmits are triggered by ACKs or timers. So this commit checks
|
||||
for bubbles of app-limited silence upon ACKs or timers.
|
||||
|
||||
Why does this commit check for app-limited state at the start of
|
||||
ACKs and timer handling? Because at that point we know whether
|
||||
inflight was fully using the cwnd. During processing the ACK or
|
||||
timer event we often change the cwnd; after changing the cwnd we
|
||||
can't know whether inflight was fully using the old cwnd.
|
||||
|
||||
Origin-9xx-SHA1: 3fe9b53291e018407780fb8c356adb5666722cbc
|
||||
Change-Id: I37221506f5166877c2b110753d39bb0757985e68
|
||||
Signed-off-by: Alexandre Frade <kernel@xanmod.org>
|
||||
---
|
||||
net/ipv4/tcp_input.c | 1 +
|
||||
net/ipv4/tcp_timer.c | 1 +
|
||||
2 files changed, 2 insertions(+)
|
||||
|
||||
--- a/net/ipv4/tcp_input.c
|
||||
+++ b/net/ipv4/tcp_input.c
|
||||
@@ -4003,6 +4003,7 @@ static int tcp_ack(struct sock *sk, cons
|
||||
|
||||
prior_fack = tcp_is_sack(tp) ? tcp_highest_sack_seq(tp) : tp->snd_una;
|
||||
rs.prior_in_flight = tcp_packets_in_flight(tp);
|
||||
+ tcp_rate_check_app_limited(sk);
|
||||
|
||||
/* ts_recent update must be made after we are sure that the packet
|
||||
* is in window.
|
||||
--- a/net/ipv4/tcp_timer.c
|
||||
+++ b/net/ipv4/tcp_timer.c
|
||||
@@ -702,6 +702,7 @@ void tcp_write_timer_handler(struct sock
|
||||
icsk_timeout(icsk));
|
||||
return;
|
||||
}
|
||||
+ tcp_rate_check_app_limited(sk);
|
||||
tcp_mstamp_refresh(tcp_sk(sk));
|
||||
event = icsk->icsk_pending;
|
||||
|
File diff suppressed because it is too large
Load Diff
@@ -1,74 +0,0 @@
|
||||
From e02830baf6c32aceea6c0dfe8a0a4b96db171418 Mon Sep 17 00:00:00 2001
|
||||
From: Neal Cardwell <ncardwell@google.com>
|
||||
Date: Sun, 24 Jun 2018 21:55:59 -0400
|
||||
Subject: [PATCH 02/18] net-tcp_bbr: v2: shrink delivered_mstamp,
|
||||
first_tx_mstamp to u32 to free up 8 bytes
|
||||
|
||||
Free up some space for tracking inflight and losses for each
|
||||
bw sample, in upcoming commits.
|
||||
|
||||
These timestamps are in microseconds, and are now stored in 32
|
||||
bits. So they can only hold time intervals up to roughly 2^12 = 4096
|
||||
seconds. But Linux TCP RTT and RTO tracking has the same 32-bit
|
||||
microsecond implementation approach and resulting deployment
|
||||
limitations. So this is not introducing a new limit. And these should
|
||||
not be a limitation for the foreseeable future.
|
||||
|
||||
Effort: net-tcp_bbr
|
||||
Origin-9xx-SHA1: 238a7e6b5d51625fef1ce7769826a7b21b02ae55
|
||||
Change-Id: I3b779603797263b52a61ad57c565eb91fe42680c
|
||||
Signed-off-by: Alexandre Frade <kernel@xanmod.org>
|
||||
---
|
||||
include/net/tcp.h | 9 +++++++--
|
||||
net/ipv4/tcp_rate.c | 7 ++++---
|
||||
2 files changed, 11 insertions(+), 5 deletions(-)
|
||||
|
||||
--- a/include/net/tcp.h
|
||||
+++ b/include/net/tcp.h
|
||||
@@ -945,6 +945,11 @@ static inline u32 tcp_stamp_us_delta(u64
|
||||
return max_t(s64, t1 - t0, 0);
|
||||
}
|
||||
|
||||
+static inline u32 tcp_stamp32_us_delta(u32 t1, u32 t0)
|
||||
+{
|
||||
+ return max_t(s32, t1 - t0, 0);
|
||||
+}
|
||||
+
|
||||
/* provide the departure time in us unit */
|
||||
static inline u64 tcp_skb_timestamp_us(const struct sk_buff *skb)
|
||||
{
|
||||
@@ -1043,9 +1048,9 @@ struct tcp_skb_cb {
|
||||
/* pkts S/ACKed so far upon tx of skb, incl retrans: */
|
||||
__u32 delivered;
|
||||
/* start of send pipeline phase */
|
||||
- u64 first_tx_mstamp;
|
||||
+ u32 first_tx_mstamp;
|
||||
/* when we reached the "delivered" count */
|
||||
- u64 delivered_mstamp;
|
||||
+ u32 delivered_mstamp;
|
||||
} tx; /* only used for outgoing skbs */
|
||||
union {
|
||||
struct inet_skb_parm h4;
|
||||
--- a/net/ipv4/tcp_rate.c
|
||||
+++ b/net/ipv4/tcp_rate.c
|
||||
@@ -101,8 +101,9 @@ void tcp_rate_skb_delivered(struct sock
|
||||
/* Record send time of most recently ACKed packet: */
|
||||
tp->first_tx_mstamp = tx_tstamp;
|
||||
/* Find the duration of the "send phase" of this window: */
|
||||
- rs->interval_us = tcp_stamp_us_delta(tp->first_tx_mstamp,
|
||||
- scb->tx.first_tx_mstamp);
|
||||
+ rs->interval_us = tcp_stamp32_us_delta(
|
||||
+ tp->first_tx_mstamp,
|
||||
+ scb->tx.first_tx_mstamp);
|
||||
|
||||
}
|
||||
/* Mark off the skb delivered once it's sacked to avoid being
|
||||
@@ -155,7 +156,7 @@ void tcp_rate_gen(struct sock *sk, u32 d
|
||||
* longer phase.
|
||||
*/
|
||||
snd_us = rs->interval_us; /* send phase */
|
||||
- ack_us = tcp_stamp_us_delta(tp->tcp_mstamp,
|
||||
+ ack_us = tcp_stamp32_us_delta(tp->tcp_mstamp,
|
||||
rs->prior_mstamp); /* ack phase */
|
||||
rs->interval_us = max(snd_us, ack_us);
|
||||
|
@@ -1,109 +0,0 @@
|
||||
From b39e9e473ed0446e4268cffb09f4a260cea7c341 Mon Sep 17 00:00:00 2001
|
||||
From: Neal Cardwell <ncardwell@google.com>
|
||||
Date: Sat, 5 Aug 2017 11:49:50 -0400
|
||||
Subject: [PATCH 03/18] net-tcp_bbr: v2: snapshot packets in flight at transmit
|
||||
time and pass in rate_sample
|
||||
|
||||
CC algorithms may want to snapshot the number of packets in flight at
|
||||
transmit time and pass in rate_sample, to understand the relationship
|
||||
between inflight and losses or ECN signals, to try to find the highest
|
||||
inflight value that has acceptable levels of loss/ECN marking.
|
||||
|
||||
We split out the code to set an skb's tx.in_flight field into its own
|
||||
function, so that this code can be used for the TCP_REPAIR "fake send"
|
||||
code path that inserts skbs into the rtx queue without sending them.
|
||||
|
||||
Effort: net-tcp_bbr
|
||||
Origin-9xx-SHA1: b3eb4f2d20efab4ca001f32c9294739036c493ea
|
||||
Origin-9xx-SHA1: e880fc907d06ea7354333f60f712748ebce9497b
|
||||
Origin-9xx-SHA1: 330f825a08a6fe92cef74d799cc468864c479f63
|
||||
Change-Id: I7314047d0ff14dd261a04b1969a46dc658c8836a
|
||||
Signed-off-by: Alexandre Frade <kernel@xanmod.org>
|
||||
---
|
||||
include/net/tcp.h | 6 ++++++
|
||||
net/ipv4/tcp_output.c | 1 +
|
||||
net/ipv4/tcp_rate.c | 20 ++++++++++++++++++++
|
||||
3 files changed, 27 insertions(+)
|
||||
|
||||
--- a/include/net/tcp.h
|
||||
+++ b/include/net/tcp.h
|
||||
@@ -1051,6 +1051,10 @@ struct tcp_skb_cb {
|
||||
u32 first_tx_mstamp;
|
||||
/* when we reached the "delivered" count */
|
||||
u32 delivered_mstamp;
|
||||
+#define TCPCB_IN_FLIGHT_BITS 20
|
||||
+#define TCPCB_IN_FLIGHT_MAX ((1U << TCPCB_IN_FLIGHT_BITS) - 1)
|
||||
+ u32 in_flight:20, /* packets in flight at transmit */
|
||||
+ unused2:12;
|
||||
} tx; /* only used for outgoing skbs */
|
||||
union {
|
||||
struct inet_skb_parm h4;
|
||||
@@ -1207,6 +1211,7 @@ struct rate_sample {
|
||||
u64 prior_mstamp; /* starting timestamp for interval */
|
||||
u32 prior_delivered; /* tp->delivered at "prior_mstamp" */
|
||||
u32 prior_delivered_ce;/* tp->delivered_ce at "prior_mstamp" */
|
||||
+ u32 tx_in_flight; /* packets in flight at starting timestamp */
|
||||
s32 delivered; /* number of packets delivered over interval */
|
||||
s32 delivered_ce; /* number of packets delivered w/ CE marks*/
|
||||
long interval_us; /* time for tp->delivered to incr "delivered" */
|
||||
@@ -1329,6 +1334,7 @@ static inline void tcp_ca_event(struct s
|
||||
void tcp_set_ca_state(struct sock *sk, const u8 ca_state);
|
||||
|
||||
/* From tcp_rate.c */
|
||||
+void tcp_set_tx_in_flight(struct sock *sk, struct sk_buff *skb);
|
||||
void tcp_rate_skb_sent(struct sock *sk, struct sk_buff *skb);
|
||||
void tcp_rate_skb_delivered(struct sock *sk, struct sk_buff *skb,
|
||||
struct rate_sample *rs);
|
||||
--- a/net/ipv4/tcp_output.c
|
||||
+++ b/net/ipv4/tcp_output.c
|
||||
@@ -2777,6 +2777,7 @@ static bool tcp_write_xmit(struct sock *
|
||||
skb_set_delivery_time(skb, tp->tcp_wstamp_ns, SKB_CLOCK_MONOTONIC);
|
||||
list_move_tail(&skb->tcp_tsorted_anchor, &tp->tsorted_sent_queue);
|
||||
tcp_init_tso_segs(skb, mss_now);
|
||||
+ tcp_set_tx_in_flight(sk, skb);
|
||||
goto repair; /* Skip network transmission */
|
||||
}
|
||||
|
||||
--- a/net/ipv4/tcp_rate.c
|
||||
+++ b/net/ipv4/tcp_rate.c
|
||||
@@ -34,6 +34,24 @@
|
||||
* ready to send in the write queue.
|
||||
*/
|
||||
|
||||
+void tcp_set_tx_in_flight(struct sock *sk, struct sk_buff *skb)
|
||||
+{
|
||||
+ struct tcp_sock *tp = tcp_sk(sk);
|
||||
+ u32 in_flight;
|
||||
+
|
||||
+ /* Check, sanitize, and record packets in flight after skb was sent. */
|
||||
+ in_flight = tcp_packets_in_flight(tp) + tcp_skb_pcount(skb);
|
||||
+ if (WARN_ONCE(in_flight > TCPCB_IN_FLIGHT_MAX,
|
||||
+ "insane in_flight %u cc %s mss %u "
|
||||
+ "cwnd %u pif %u %u %u %u\n",
|
||||
+ in_flight, inet_csk(sk)->icsk_ca_ops->name,
|
||||
+ tp->mss_cache, tp->snd_cwnd,
|
||||
+ tp->packets_out, tp->retrans_out,
|
||||
+ tp->sacked_out, tp->lost_out))
|
||||
+ in_flight = TCPCB_IN_FLIGHT_MAX;
|
||||
+ TCP_SKB_CB(skb)->tx.in_flight = in_flight;
|
||||
+}
|
||||
+
|
||||
/* Snapshot the current delivery information in the skb, to generate
|
||||
* a rate sample later when the skb is (s)acked in tcp_rate_skb_delivered().
|
||||
*/
|
||||
@@ -67,6 +85,7 @@ void tcp_rate_skb_sent(struct sock *sk,
|
||||
TCP_SKB_CB(skb)->tx.delivered = tp->delivered;
|
||||
TCP_SKB_CB(skb)->tx.delivered_ce = tp->delivered_ce;
|
||||
TCP_SKB_CB(skb)->tx.is_app_limited = tp->app_limited ? 1 : 0;
|
||||
+ tcp_set_tx_in_flight(sk, skb);
|
||||
}
|
||||
|
||||
/* When an skb is sacked or acked, we fill in the rate sample with the (prior)
|
||||
@@ -96,6 +115,7 @@ void tcp_rate_skb_delivered(struct sock
|
||||
rs->prior_mstamp = scb->tx.delivered_mstamp;
|
||||
rs->is_app_limited = scb->tx.is_app_limited;
|
||||
rs->is_retrans = scb->sacked & TCPCB_RETRANS;
|
||||
+ rs->tx_in_flight = scb->tx.in_flight;
|
||||
rs->last_end_seq = scb->end_seq;
|
||||
|
||||
/* Record send time of most recently ACKed packet: */
|
@@ -1,70 +0,0 @@
|
||||
From e4c82a08a05eeee9341511f35e922914520dd401 Mon Sep 17 00:00:00 2001
|
||||
From: Neal Cardwell <ncardwell@google.com>
|
||||
Date: Thu, 12 Oct 2017 23:44:27 -0400
|
||||
Subject: [PATCH 04/18] net-tcp_bbr: v2: count packets lost over TCP rate
|
||||
sampling interval
|
||||
|
||||
For understanding the relationship between inflight and packet loss
|
||||
signals, to try to find the highest inflight value that has acceptable
|
||||
levels of packet losses.
|
||||
|
||||
Effort: net-tcp_bbr
|
||||
Origin-9xx-SHA1: 4527e26b2bd7756a88b5b9ef1ada3da33dd609ab
|
||||
Change-Id: I594c2500868d9c530770e7ddd68ffc87c57f4fd5
|
||||
Signed-off-by: Alexandre Frade <kernel@xanmod.org>
|
||||
---
|
||||
include/net/tcp.h | 5 ++++-
|
||||
net/ipv4/tcp_rate.c | 3 +++
|
||||
2 files changed, 7 insertions(+), 1 deletion(-)
|
||||
|
||||
--- a/include/net/tcp.h
|
||||
+++ b/include/net/tcp.h
|
||||
@@ -1055,6 +1055,7 @@ struct tcp_skb_cb {
|
||||
#define TCPCB_IN_FLIGHT_MAX ((1U << TCPCB_IN_FLIGHT_BITS) - 1)
|
||||
u32 in_flight:20, /* packets in flight at transmit */
|
||||
unused2:12;
|
||||
+ u32 lost; /* packets lost so far upon tx of skb */
|
||||
} tx; /* only used for outgoing skbs */
|
||||
union {
|
||||
struct inet_skb_parm h4;
|
||||
@@ -1209,11 +1210,13 @@ struct ack_sample {
|
||||
*/
|
||||
struct rate_sample {
|
||||
u64 prior_mstamp; /* starting timestamp for interval */
|
||||
+ u32 prior_lost; /* tp->lost at "prior_mstamp" */
|
||||
u32 prior_delivered; /* tp->delivered at "prior_mstamp" */
|
||||
u32 prior_delivered_ce;/* tp->delivered_ce at "prior_mstamp" */
|
||||
u32 tx_in_flight; /* packets in flight at starting timestamp */
|
||||
+ s32 lost; /* number of packets lost over interval */
|
||||
s32 delivered; /* number of packets delivered over interval */
|
||||
- s32 delivered_ce; /* number of packets delivered w/ CE marks*/
|
||||
+ s32 delivered_ce; /* packets delivered w/ CE mark over interval */
|
||||
long interval_us; /* time for tp->delivered to incr "delivered" */
|
||||
u32 snd_interval_us; /* snd interval for delivered packets */
|
||||
u32 rcv_interval_us; /* rcv interval for delivered packets */
|
||||
--- a/net/ipv4/tcp_rate.c
|
||||
+++ b/net/ipv4/tcp_rate.c
|
||||
@@ -84,6 +84,7 @@ void tcp_rate_skb_sent(struct sock *sk,
|
||||
TCP_SKB_CB(skb)->tx.delivered_mstamp = tp->delivered_mstamp;
|
||||
TCP_SKB_CB(skb)->tx.delivered = tp->delivered;
|
||||
TCP_SKB_CB(skb)->tx.delivered_ce = tp->delivered_ce;
|
||||
+ TCP_SKB_CB(skb)->tx.lost = tp->lost;
|
||||
TCP_SKB_CB(skb)->tx.is_app_limited = tp->app_limited ? 1 : 0;
|
||||
tcp_set_tx_in_flight(sk, skb);
|
||||
}
|
||||
@@ -110,6 +111,7 @@ void tcp_rate_skb_delivered(struct sock
|
||||
if (!rs->prior_delivered ||
|
||||
tcp_skb_sent_after(tx_tstamp, tp->first_tx_mstamp,
|
||||
scb->end_seq, rs->last_end_seq)) {
|
||||
+ rs->prior_lost = scb->tx.lost;
|
||||
rs->prior_delivered_ce = scb->tx.delivered_ce;
|
||||
rs->prior_delivered = scb->tx.delivered;
|
||||
rs->prior_mstamp = scb->tx.delivered_mstamp;
|
||||
@@ -165,6 +167,7 @@ void tcp_rate_gen(struct sock *sk, u32 d
|
||||
return;
|
||||
}
|
||||
rs->delivered = tp->delivered - rs->prior_delivered;
|
||||
+ rs->lost = tp->lost - rs->prior_lost;
|
||||
|
||||
rs->delivered_ce = tp->delivered_ce - rs->prior_delivered_ce;
|
||||
/* delivered_ce occupies less than 32 bits in the skb control block */
|
@@ -1,38 +0,0 @@
|
||||
From 3d3b54d263b6271926e0d9800ca000a529267cfe Mon Sep 17 00:00:00 2001
|
||||
From: Neal Cardwell <ncardwell@google.com>
|
||||
Date: Mon, 19 Nov 2018 13:48:36 -0500
|
||||
Subject: [PATCH 05/18] net-tcp_bbr: v2: export FLAG_ECE in rate_sample.is_ece
|
||||
|
||||
For understanding the relationship between inflight and ECN signals,
|
||||
to try to find the highest inflight value that has acceptable levels
|
||||
ECN marking.
|
||||
|
||||
Effort: net-tcp_bbr
|
||||
Origin-9xx-SHA1: 3eba998f2898541406c2666781182200934965a8
|
||||
Change-Id: I3a964e04cee83e11649a54507043d2dfe769a3b3
|
||||
Signed-off-by: Alexandre Frade <kernel@xanmod.org>
|
||||
---
|
||||
include/net/tcp.h | 1 +
|
||||
net/ipv4/tcp_input.c | 1 +
|
||||
2 files changed, 2 insertions(+)
|
||||
|
||||
--- a/include/net/tcp.h
|
||||
+++ b/include/net/tcp.h
|
||||
@@ -1228,6 +1228,7 @@ struct rate_sample {
|
||||
bool is_app_limited; /* is sample from packet with bubble in pipe? */
|
||||
bool is_retrans; /* is sample from retransmission? */
|
||||
bool is_ack_delayed; /* is this (likely) a delayed ACK? */
|
||||
+ bool is_ece; /* did this ACK have ECN marked? */
|
||||
};
|
||||
|
||||
struct tcp_congestion_ops {
|
||||
--- a/net/ipv4/tcp_input.c
|
||||
+++ b/net/ipv4/tcp_input.c
|
||||
@@ -4093,6 +4093,7 @@ static int tcp_ack(struct sock *sk, cons
|
||||
delivered = tcp_newly_delivered(sk, delivered, flag);
|
||||
lost = tp->lost - lost; /* freshly marked lost */
|
||||
rs.is_ack_delayed = !!(flag & FLAG_ACK_MAYBE_DELAYED);
|
||||
+ rs.is_ece = !!(flag & FLAG_ECE);
|
||||
tcp_rate_gen(sk, delivered, lost, is_sack_reneg, sack_state.rate);
|
||||
tcp_cong_control(sk, ack, delivered, flag, sack_state.rate);
|
||||
tcp_xmit_recovery(sk, rexmit);
|
@@ -1,57 +0,0 @@
|
||||
From 6f3ecfa5c713cacd451b876f4190da6d36c512eb Mon Sep 17 00:00:00 2001
|
||||
From: Neal Cardwell <ncardwell@google.com>
|
||||
Date: Tue, 7 Aug 2018 21:52:06 -0400
|
||||
Subject: [PATCH 06/18] net-tcp_bbr: v2: introduce ca_ops->skb_marked_lost() CC
|
||||
module callback API
|
||||
|
||||
For connections experiencing reordering, RACK can mark packets lost
|
||||
long after we receive the SACKs/ACKs hinting that the packets were
|
||||
actually lost.
|
||||
|
||||
This means that CC modules cannot easily learn the volume of inflight
|
||||
data at which packet loss happens by looking at the current inflight
|
||||
or even the packets in flight when the most recently SACKed packet was
|
||||
sent. To learn this, CC modules need to know how many packets were in
|
||||
flight at the time lost packets were sent. This new callback, combined
|
||||
with TCP_SKB_CB(skb)->tx.in_flight, allows them to learn this.
|
||||
|
||||
This also provides a consistent callback that is invoked whether
|
||||
packets are marked lost upon ACK processing, using the RACK reordering
|
||||
timer, or at RTO time.
|
||||
|
||||
Effort: net-tcp_bbr
|
||||
Origin-9xx-SHA1: afcbebe3374e4632ac6714d39e4dc8a8455956f4
|
||||
Change-Id: I54826ab53df636be537e5d3c618a46145d12d51a
|
||||
Signed-off-by: Alexandre Frade <kernel@xanmod.org>
|
||||
---
|
||||
include/net/tcp.h | 3 +++
|
||||
net/ipv4/tcp_input.c | 5 +++++
|
||||
2 files changed, 8 insertions(+)
|
||||
|
||||
--- a/include/net/tcp.h
|
||||
+++ b/include/net/tcp.h
|
||||
@@ -1255,6 +1255,9 @@ struct tcp_congestion_ops {
|
||||
/* override sysctl_tcp_min_tso_segs */
|
||||
u32 (*min_tso_segs)(struct sock *sk);
|
||||
|
||||
+ /* react to a specific lost skb (optional) */
|
||||
+ void (*skb_marked_lost)(struct sock *sk, const struct sk_buff *skb);
|
||||
+
|
||||
/* call when packets are delivered to update cwnd and pacing rate,
|
||||
* after all the ca_state processing. (optional)
|
||||
*/
|
||||
--- a/net/ipv4/tcp_input.c
|
||||
+++ b/net/ipv4/tcp_input.c
|
||||
@@ -1135,7 +1135,12 @@ static void tcp_verify_retransmit_hint(s
|
||||
*/
|
||||
static void tcp_notify_skb_loss_event(struct tcp_sock *tp, const struct sk_buff *skb)
|
||||
{
|
||||
+ struct sock *sk = (struct sock *)tp;
|
||||
+ const struct tcp_congestion_ops *ca_ops = inet_csk(sk)->icsk_ca_ops;
|
||||
+
|
||||
tp->lost += tcp_skb_pcount(skb);
|
||||
+ if (ca_ops->skb_marked_lost)
|
||||
+ ca_ops->skb_marked_lost(sk, skb);
|
||||
}
|
||||
|
||||
void tcp_mark_skb_lost(struct sock *sk, struct sk_buff *skb)
|
@@ -1,59 +0,0 @@
|
||||
From 52b39284416d0d841d4d51db0d3fce1c191a00d7 Mon Sep 17 00:00:00 2001
|
||||
From: Neal Cardwell <ncardwell@google.com>
|
||||
Date: Wed, 1 May 2019 20:16:33 -0400
|
||||
Subject: [PATCH 07/18] net-tcp_bbr: v2: adjust skb tx.in_flight upon merge in
|
||||
tcp_shifted_skb()
|
||||
|
||||
When tcp_shifted_skb() updates state as adjacent SACKed skbs are
|
||||
coalesced, previously the tx.in_flight was not adjusted, so we could
|
||||
get contradictory state where the skb's recorded pcount was bigger
|
||||
than the tx.in_flight (the number of segments that were in_flight
|
||||
after sending the skb).
|
||||
|
||||
Normally have a SACKed skb with contradictory pcount/tx.in_flight
|
||||
would not matter. However, with SACK reneging, the SACKed bit is
|
||||
removed, and an skb once again becomes eligible for retransmitting,
|
||||
fragmenting, SACKing, etc. Packetdrill testing verified the following
|
||||
sequence is possible in a kernel that does not have this commit:
|
||||
|
||||
- skb N is SACKed
|
||||
- skb N+1 is SACKed and combined with skb N using tcp_shifted_skb()
|
||||
- tcp_shifted_skb() will increase the pcount of prev,
|
||||
but leave tx.in_flight as-is
|
||||
- so prev skb can have pcount > tx.in_flight
|
||||
- RTO, tcp_timeout_mark_lost(), detect reneg,
|
||||
remove "SACKed" bit, mark skb N as lost
|
||||
- find pcount of skb N is greater than its tx.in_flight
|
||||
|
||||
I suspect this issue iw what caused the bbr2_inflight_hi_from_lost_skb():
|
||||
WARN_ON_ONCE(inflight_prev < 0)
|
||||
to fire in production machines using bbr2.
|
||||
|
||||
Effort: net-tcp_bbr
|
||||
Origin-9xx-SHA1: 1a3e997e613d2dcf32b947992882854ebe873715
|
||||
Change-Id: I1b0b75c27519953430c7db51c6f358f104c7af55
|
||||
Signed-off-by: Alexandre Frade <kernel@xanmod.org>
|
||||
---
|
||||
net/ipv4/tcp_input.c | 11 +++++++++++
|
||||
1 file changed, 11 insertions(+)
|
||||
|
||||
--- a/net/ipv4/tcp_input.c
|
||||
+++ b/net/ipv4/tcp_input.c
|
||||
@@ -1512,6 +1512,17 @@ static bool tcp_shifted_skb(struct sock
|
||||
WARN_ON_ONCE(tcp_skb_pcount(skb) < pcount);
|
||||
tcp_skb_pcount_add(skb, -pcount);
|
||||
|
||||
+ /* Adjust tx.in_flight as pcount is shifted from skb to prev. */
|
||||
+ if (WARN_ONCE(TCP_SKB_CB(skb)->tx.in_flight < pcount,
|
||||
+ "prev in_flight: %u skb in_flight: %u pcount: %u",
|
||||
+ TCP_SKB_CB(prev)->tx.in_flight,
|
||||
+ TCP_SKB_CB(skb)->tx.in_flight,
|
||||
+ pcount))
|
||||
+ TCP_SKB_CB(skb)->tx.in_flight = 0;
|
||||
+ else
|
||||
+ TCP_SKB_CB(skb)->tx.in_flight -= pcount;
|
||||
+ TCP_SKB_CB(prev)->tx.in_flight += pcount;
|
||||
+
|
||||
/* When we're adding to gso_segs == 1, gso_size will be zero,
|
||||
* in theory this shouldn't be necessary but as long as DSACK
|
||||
* code can come after this skb later on it's better to keep
|
@@ -1,97 +0,0 @@
|
||||
From 64570028a478a2249356345127bb3a3c75509d57 Mon Sep 17 00:00:00 2001
|
||||
From: Neal Cardwell <ncardwell@google.com>
|
||||
Date: Wed, 1 May 2019 20:16:25 -0400
|
||||
Subject: [PATCH 08/18] net-tcp_bbr: v2: adjust skb tx.in_flight upon split in
|
||||
tcp_fragment()
|
||||
|
||||
When we fragment an skb that has already been sent, we need to update
|
||||
the tx.in_flight for the first skb in the resulting pair ("buff").
|
||||
|
||||
Because we were not updating the tx.in_flight, the tx.in_flight value
|
||||
was inconsistent with the pcount of the "buff" skb (tx.in_flight would
|
||||
be too high). That meant that if the "buff" skb was lost, then
|
||||
bbr2_inflight_hi_from_lost_skb() would calculate an inflight_hi value
|
||||
that is too high. This could result in longer queues and higher packet
|
||||
loss.
|
||||
|
||||
Packetdrill testing verified that without this commit, when the second
|
||||
half of an skb is SACKed and then later the first half of that skb is
|
||||
marked lost, the calculated inflight_hi was incorrect.
|
||||
|
||||
Effort: net-tcp_bbr
|
||||
Origin-9xx-SHA1: 385f1ddc610798fab2837f9f372857438b25f874
|
||||
Origin-9xx-SHA1: a0eb099690af net-tcp_bbr: v2: fix tcp_fragment() tx.in_flight recomputation [prod feb 8 2021; use as a fixup]
|
||||
Origin-9xx-SHA1: 885503228153ff0c9114e net-tcp_bbr: v2: introduce tcp_skb_tx_in_flight_is_suspicious() helper for warnings
|
||||
Change-Id: I617f8cab4e9be7a0b8e8d30b047bf8645393354d
|
||||
Signed-off-by: Alexandre Frade <kernel@xanmod.org>
|
||||
---
|
||||
include/net/tcp.h | 15 +++++++++++++++
|
||||
net/ipv4/tcp_output.c | 26 +++++++++++++++++++++++++-
|
||||
2 files changed, 40 insertions(+), 1 deletion(-)
|
||||
|
||||
--- a/include/net/tcp.h
|
||||
+++ b/include/net/tcp.h
|
||||
@@ -1354,6 +1354,21 @@ static inline bool tcp_skb_sent_after(u6
|
||||
return t1 > t2 || (t1 == t2 && after(seq1, seq2));
|
||||
}
|
||||
|
||||
+/* If a retransmit failed due to local qdisc congestion or other local issues,
|
||||
+ * then we may have called tcp_set_skb_tso_segs() to increase the number of
|
||||
+ * segments in the skb without increasing the tx.in_flight. In all other cases,
|
||||
+ * the tx.in_flight should be at least as big as the pcount of the sk_buff. We
|
||||
+ * do not have the state to know whether a retransmit failed due to local qdisc
|
||||
+ * congestion or other local issues, so to avoid spurious warnings we consider
|
||||
+ * that any skb marked lost may have suffered that fate.
|
||||
+ */
|
||||
+static inline bool tcp_skb_tx_in_flight_is_suspicious(u32 skb_pcount,
|
||||
+ u32 skb_sacked_flags,
|
||||
+ u32 tx_in_flight)
|
||||
+{
|
||||
+ return (skb_pcount > tx_in_flight) && !(skb_sacked_flags & TCPCB_LOST);
|
||||
+}
|
||||
+
|
||||
/* These functions determine how the current flow behaves in respect of SACK
|
||||
* handling. SACK is negotiated with the peer, and therefore it can vary
|
||||
* between different flows.
|
||||
--- a/net/ipv4/tcp_output.c
|
||||
+++ b/net/ipv4/tcp_output.c
|
||||
@@ -1614,7 +1614,7 @@ int tcp_fragment(struct sock *sk, enum t
|
||||
{
|
||||
struct tcp_sock *tp = tcp_sk(sk);
|
||||
struct sk_buff *buff;
|
||||
- int old_factor;
|
||||
+ int old_factor, inflight_prev;
|
||||
long limit;
|
||||
u16 flags;
|
||||
int nlen;
|
||||
@@ -1689,6 +1689,30 @@ int tcp_fragment(struct sock *sk, enum t
|
||||
|
||||
if (diff)
|
||||
tcp_adjust_pcount(sk, skb, diff);
|
||||
+
|
||||
+ inflight_prev = TCP_SKB_CB(skb)->tx.in_flight - old_factor;
|
||||
+ if (inflight_prev < 0) {
|
||||
+ WARN_ONCE(tcp_skb_tx_in_flight_is_suspicious(
|
||||
+ old_factor,
|
||||
+ TCP_SKB_CB(skb)->sacked,
|
||||
+ TCP_SKB_CB(skb)->tx.in_flight),
|
||||
+ "inconsistent: tx.in_flight: %u "
|
||||
+ "old_factor: %d mss: %u sacked: %u "
|
||||
+ "1st pcount: %d 2nd pcount: %d "
|
||||
+ "1st len: %u 2nd len: %u ",
|
||||
+ TCP_SKB_CB(skb)->tx.in_flight, old_factor,
|
||||
+ mss_now, TCP_SKB_CB(skb)->sacked,
|
||||
+ tcp_skb_pcount(skb), tcp_skb_pcount(buff),
|
||||
+ skb->len, buff->len);
|
||||
+ inflight_prev = 0;
|
||||
+ }
|
||||
+ /* Set 1st tx.in_flight as if 1st were sent by itself: */
|
||||
+ TCP_SKB_CB(skb)->tx.in_flight = inflight_prev +
|
||||
+ tcp_skb_pcount(skb);
|
||||
+ /* Set 2nd tx.in_flight with new 1st and 2nd pcounts: */
|
||||
+ TCP_SKB_CB(buff)->tx.in_flight = inflight_prev +
|
||||
+ tcp_skb_pcount(skb) +
|
||||
+ tcp_skb_pcount(buff);
|
||||
}
|
||||
|
||||
/* Link BUFF into the send queue. */
|
@@ -1,73 +0,0 @@
|
||||
From fb3f903921be91a91b577d3771e5d5c9e9fe3aa9 Mon Sep 17 00:00:00 2001
|
||||
From: Yousuk Seung <ysseung@google.com>
|
||||
Date: Wed, 23 May 2018 17:55:54 -0700
|
||||
Subject: [PATCH 09/18] net-tcp: add new ca opts flag TCP_CONG_WANTS_CE_EVENTS
|
||||
|
||||
Add a a new ca opts flag TCP_CONG_WANTS_CE_EVENTS that allows a
|
||||
congestion control module to receive CE events.
|
||||
|
||||
Currently congestion control modules have to set the TCP_CONG_NEEDS_ECN
|
||||
bit in opts flag to receive CE events but this may incur changes in ECN
|
||||
behavior elsewhere. This patch adds a new bit TCP_CONG_WANTS_CE_EVENTS
|
||||
that allows congestion control modules to receive CE events
|
||||
independently of TCP_CONG_NEEDS_ECN.
|
||||
|
||||
Effort: net-tcp
|
||||
Origin-9xx-SHA1: 9f7e14716cde760bc6c67ef8ef7e1ee48501d95b
|
||||
Change-Id: I2255506985242f376d910c6fd37daabaf4744f24
|
||||
Signed-off-by: Alexandre Frade <kernel@xanmod.org>
|
||||
---
|
||||
include/net/tcp.h | 14 +++++++++++++-
|
||||
net/ipv4/tcp_input.c | 4 ++--
|
||||
2 files changed, 15 insertions(+), 3 deletions(-)
|
||||
|
||||
--- a/include/net/tcp.h
|
||||
+++ b/include/net/tcp.h
|
||||
@@ -1190,7 +1190,11 @@ enum tcp_ca_ack_event_flags {
|
||||
#define TCP_CONG_NON_RESTRICTED BIT(0)
|
||||
/* Requires ECN/ECT set on all packets */
|
||||
#define TCP_CONG_NEEDS_ECN BIT(1)
|
||||
-#define TCP_CONG_MASK (TCP_CONG_NON_RESTRICTED | TCP_CONG_NEEDS_ECN)
|
||||
+/* Wants notification of CE events (CA_EVENT_ECN_IS_CE, CA_EVENT_ECN_NO_CE). */
|
||||
+#define TCP_CONG_WANTS_CE_EVENTS BIT(2)
|
||||
+#define TCP_CONG_MASK (TCP_CONG_NON_RESTRICTED | \
|
||||
+ TCP_CONG_NEEDS_ECN | \
|
||||
+ TCP_CONG_WANTS_CE_EVENTS)
|
||||
|
||||
union tcp_cc_info;
|
||||
|
||||
@@ -1322,6 +1326,14 @@ static inline char *tcp_ca_get_name_by_k
|
||||
}
|
||||
#endif
|
||||
|
||||
+static inline bool tcp_ca_wants_ce_events(const struct sock *sk)
|
||||
+{
|
||||
+ const struct inet_connection_sock *icsk = inet_csk(sk);
|
||||
+
|
||||
+ return icsk->icsk_ca_ops->flags & (TCP_CONG_NEEDS_ECN |
|
||||
+ TCP_CONG_WANTS_CE_EVENTS);
|
||||
+}
|
||||
+
|
||||
static inline bool tcp_ca_needs_ecn(const struct sock *sk)
|
||||
{
|
||||
const struct inet_connection_sock *icsk = inet_csk(sk);
|
||||
--- a/net/ipv4/tcp_input.c
|
||||
+++ b/net/ipv4/tcp_input.c
|
||||
@@ -381,7 +381,7 @@ static void tcp_data_ecn_check(struct so
|
||||
tcp_enter_quickack_mode(sk, 2);
|
||||
break;
|
||||
case INET_ECN_CE:
|
||||
- if (tcp_ca_needs_ecn(sk))
|
||||
+ if (tcp_ca_wants_ce_events(sk))
|
||||
tcp_ca_event(sk, CA_EVENT_ECN_IS_CE);
|
||||
|
||||
if (!(tp->ecn_flags & TCP_ECN_DEMAND_CWR)) {
|
||||
@@ -392,7 +392,7 @@ static void tcp_data_ecn_check(struct so
|
||||
tp->ecn_flags |= TCP_ECN_SEEN;
|
||||
break;
|
||||
default:
|
||||
- if (tcp_ca_needs_ecn(sk))
|
||||
+ if (tcp_ca_wants_ce_events(sk))
|
||||
tcp_ca_event(sk, CA_EVENT_ECN_NO_CE);
|
||||
tp->ecn_flags |= TCP_ECN_SEEN;
|
||||
break;
|
@@ -1,139 +0,0 @@
|
||||
From e0ac041e7e63d138d210fe875120447c11d2d4ba Mon Sep 17 00:00:00 2001
|
||||
From: Neal Cardwell <ncardwell@google.com>
|
||||
Date: Fri, 27 Sep 2019 17:10:26 -0400
|
||||
Subject: [PATCH 10/18] net-tcp: re-generalize TSO sizing in TCP CC module API
|
||||
|
||||
Reorganize the API for CC modules so that the CC module once again
|
||||
gets complete control of the TSO sizing decision. This is how the API
|
||||
was set up around 2016 and the initial BBRv1 upstreaming. Later Eric
|
||||
Dumazet simplified it. But with wider testing it now seems that to
|
||||
avoid CPU regressions BBR needs to have a different TSO sizing
|
||||
function.
|
||||
|
||||
This is necessary to handle cases where there are many flows
|
||||
bottlenecked on the sender host's NIC, in which case BBR's pacing rate
|
||||
is much lower than CUBIC/Reno/DCTCP's. Why does this happen? Because
|
||||
BBR's pacing rate adapts to the low bandwidth share each flow sees. By
|
||||
contrast, CUBIC/Reno/DCTCP see no loss or ECN, so they grow a very
|
||||
large cwnd, and thus large pacing rate and large TSO burst size.
|
||||
|
||||
Change-Id: Ic8ccfdbe4010ee8d4bf6a6334c48a2fceb2171ea
|
||||
Signed-off-by: Alexandre Frade <kernel@xanmod.org>
|
||||
---
|
||||
include/net/tcp.h | 4 ++--
|
||||
net/ipv4/bpf_tcp_ca.c | 4 ++--
|
||||
net/ipv4/tcp_bbr.c | 37 ++++++++++++++++++++++++++-----------
|
||||
net/ipv4/tcp_output.c | 11 +++++------
|
||||
4 files changed, 35 insertions(+), 21 deletions(-)
|
||||
|
||||
--- a/include/net/tcp.h
|
||||
+++ b/include/net/tcp.h
|
||||
@@ -1256,8 +1256,8 @@ struct tcp_congestion_ops {
|
||||
/* hook for packet ack accounting (optional) */
|
||||
void (*pkts_acked)(struct sock *sk, const struct ack_sample *sample);
|
||||
|
||||
- /* override sysctl_tcp_min_tso_segs */
|
||||
- u32 (*min_tso_segs)(struct sock *sk);
|
||||
+ /* pick target number of segments per TSO/GSO skb (optional): */
|
||||
+ u32 (*tso_segs)(struct sock *sk, unsigned int mss_now);
|
||||
|
||||
/* react to a specific lost skb (optional) */
|
||||
void (*skb_marked_lost)(struct sock *sk, const struct sk_buff *skb);
|
||||
--- a/net/ipv4/bpf_tcp_ca.c
|
||||
+++ b/net/ipv4/bpf_tcp_ca.c
|
||||
@@ -280,7 +280,7 @@ static void bpf_tcp_ca_pkts_acked(struct
|
||||
{
|
||||
}
|
||||
|
||||
-static u32 bpf_tcp_ca_min_tso_segs(struct sock *sk)
|
||||
+static u32 bpf_tcp_ca_tso_segs(struct sock *sk, unsigned int mss_now)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
@@ -315,7 +315,7 @@ static struct tcp_congestion_ops __bpf_o
|
||||
.cwnd_event = bpf_tcp_ca_cwnd_event,
|
||||
.in_ack_event = bpf_tcp_ca_in_ack_event,
|
||||
.pkts_acked = bpf_tcp_ca_pkts_acked,
|
||||
- .min_tso_segs = bpf_tcp_ca_min_tso_segs,
|
||||
+ .tso_segs = bpf_tcp_ca_tso_segs,
|
||||
.cong_control = bpf_tcp_ca_cong_control,
|
||||
.undo_cwnd = bpf_tcp_ca_undo_cwnd,
|
||||
.sndbuf_expand = bpf_tcp_ca_sndbuf_expand,
|
||||
--- a/net/ipv4/tcp_bbr.c
|
||||
+++ b/net/ipv4/tcp_bbr.c
|
||||
@@ -301,20 +301,35 @@ __bpf_kfunc static u32 bbr_min_tso_segs(
|
||||
return READ_ONCE(sk->sk_pacing_rate) < (bbr_min_tso_rate >> 3) ? 1 : 2;
|
||||
}
|
||||
|
||||
+/* Return the number of segments BBR would like in a TSO/GSO skb, given
|
||||
+ * a particular max gso size as a constraint.
|
||||
+ */
|
||||
+static u32 bbr_tso_segs_generic(struct sock *sk, unsigned int mss_now,
|
||||
+ u32 gso_max_size)
|
||||
+{
|
||||
+ u32 segs;
|
||||
+ u64 bytes;
|
||||
+
|
||||
+ /* Budget a TSO/GSO burst size allowance based on bw (pacing_rate). */
|
||||
+ bytes = READ_ONCE(sk->sk_pacing_rate) >> READ_ONCE(sk->sk_pacing_shift);
|
||||
+
|
||||
+ bytes = min_t(u32, bytes, gso_max_size - 1 - MAX_TCP_HEADER);
|
||||
+ segs = max_t(u32, bytes / mss_now, bbr_min_tso_segs(sk));
|
||||
+ return segs;
|
||||
+}
|
||||
+
|
||||
+/* Custom tcp_tso_autosize() for BBR, used at transmit time to cap skb size. */
|
||||
+static u32 bbr_tso_segs(struct sock *sk, unsigned int mss_now)
|
||||
+{
|
||||
+ return bbr_tso_segs_generic(sk, mss_now, sk->sk_gso_max_size);
|
||||
+}
|
||||
+
|
||||
+/* Like bbr_tso_segs(), using mss_cache, ignoring driver's sk_gso_max_size. */
|
||||
static u32 bbr_tso_segs_goal(struct sock *sk)
|
||||
{
|
||||
struct tcp_sock *tp = tcp_sk(sk);
|
||||
- u32 segs, bytes;
|
||||
-
|
||||
- /* Sort of tcp_tso_autosize() but ignoring
|
||||
- * driver provided sk_gso_max_size.
|
||||
- */
|
||||
- bytes = min_t(unsigned long,
|
||||
- READ_ONCE(sk->sk_pacing_rate) >> READ_ONCE(sk->sk_pacing_shift),
|
||||
- GSO_LEGACY_MAX_SIZE - 1 - MAX_TCP_HEADER);
|
||||
- segs = max_t(u32, bytes / tp->mss_cache, bbr_min_tso_segs(sk));
|
||||
|
||||
- return min(segs, 0x7FU);
|
||||
+ return bbr_tso_segs_generic(sk, tp->mss_cache, GSO_MAX_SIZE);
|
||||
}
|
||||
|
||||
/* Save "last known good" cwnd so we can restore it after losses or PROBE_RTT */
|
||||
@@ -1150,7 +1165,7 @@ static struct tcp_congestion_ops tcp_bbr
|
||||
.undo_cwnd = bbr_undo_cwnd,
|
||||
.cwnd_event = bbr_cwnd_event,
|
||||
.ssthresh = bbr_ssthresh,
|
||||
- .min_tso_segs = bbr_min_tso_segs,
|
||||
+ .tso_segs = bbr_tso_segs,
|
||||
.get_info = bbr_get_info,
|
||||
.set_state = bbr_set_state,
|
||||
};
|
||||
--- a/net/ipv4/tcp_output.c
|
||||
+++ b/net/ipv4/tcp_output.c
|
||||
@@ -2069,13 +2069,12 @@ static u32 tcp_tso_autosize(const struct
|
||||
static u32 tcp_tso_segs(struct sock *sk, unsigned int mss_now)
|
||||
{
|
||||
const struct tcp_congestion_ops *ca_ops = inet_csk(sk)->icsk_ca_ops;
|
||||
- u32 min_tso, tso_segs;
|
||||
+ u32 tso_segs;
|
||||
|
||||
- min_tso = ca_ops->min_tso_segs ?
|
||||
- ca_ops->min_tso_segs(sk) :
|
||||
- READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_min_tso_segs);
|
||||
-
|
||||
- tso_segs = tcp_tso_autosize(sk, mss_now, min_tso);
|
||||
+ tso_segs = ca_ops->tso_segs ?
|
||||
+ ca_ops->tso_segs(sk, mss_now) :
|
||||
+ tcp_tso_autosize(sk, mss_now,
|
||||
+ sock_net(sk)->ipv4.sysctl_tcp_min_tso_segs);
|
||||
return min_t(u32, tso_segs, sk->sk_gso_max_segs);
|
||||
}
|
||||
|
@@ -1,73 +0,0 @@
|
||||
From 323945d608bbef5fd3a444f52442bf0154c4ef0b Mon Sep 17 00:00:00 2001
|
||||
From: Neal Cardwell <ncardwell@google.com>
|
||||
Date: Sat, 16 Nov 2019 13:16:25 -0500
|
||||
Subject: [PATCH 11/18] net-tcp: add fast_ack_mode=1: skip rwin check in
|
||||
tcp_fast_ack_mode__tcp_ack_snd_check()
|
||||
|
||||
Add logic for an optional TCP connection behavior, enabled with
|
||||
tp->fast_ack_mode = 1, which disables checking the receive window
|
||||
before sending an ack in __tcp_ack_snd_check(). If this behavior is
|
||||
enabled, the data receiver sends an ACK if the amount of data is >
|
||||
RCV.MSS. TCP congestion control modules can enable this bit if
|
||||
they want to generate ACKs quickly.
|
||||
|
||||
Change-Id: Iaa0a0fd7108221f883137a79d5bfa724f1b096d4
|
||||
Signed-off-by: Alexandre Frade <kernel@xanmod.org>
|
||||
---
|
||||
include/linux/tcp.h | 3 ++-
|
||||
net/ipv4/tcp.c | 1 +
|
||||
net/ipv4/tcp_cong.c | 1 +
|
||||
net/ipv4/tcp_input.c | 5 +++--
|
||||
4 files changed, 7 insertions(+), 3 deletions(-)
|
||||
|
||||
--- a/include/linux/tcp.h
|
||||
+++ b/include/linux/tcp.h
|
||||
@@ -248,7 +248,8 @@ struct tcp_sock {
|
||||
void (*tcp_clean_acked)(struct sock *sk, u32 acked_seq);
|
||||
#endif
|
||||
u32 snd_ssthresh; /* Slow start size threshold */
|
||||
- u8 recvmsg_inq : 1;/* Indicate # of bytes in queue upon recvmsg */
|
||||
+ u32 recvmsg_inq : 1,/* Indicate # of bytes in queue upon recvmsg */
|
||||
+ fast_ack_mode:1;/* ack ASAP if >1 rcv_mss received? */
|
||||
__cacheline_group_end(tcp_sock_read_rx);
|
||||
|
||||
/* TX read-write hotpath cache lines */
|
||||
--- a/net/ipv4/tcp.c
|
||||
+++ b/net/ipv4/tcp.c
|
||||
@@ -3411,6 +3411,7 @@ int tcp_disconnect(struct sock *sk, int
|
||||
tp->rx_opt.dsack = 0;
|
||||
tp->rx_opt.num_sacks = 0;
|
||||
tp->rcv_ooopack = 0;
|
||||
+ tp->fast_ack_mode = 0;
|
||||
|
||||
|
||||
/* Clean up fastopen related fields */
|
||||
--- a/net/ipv4/tcp_cong.c
|
||||
+++ b/net/ipv4/tcp_cong.c
|
||||
@@ -237,6 +237,7 @@ void tcp_init_congestion_control(struct
|
||||
struct inet_connection_sock *icsk = inet_csk(sk);
|
||||
|
||||
tcp_sk(sk)->prior_ssthresh = 0;
|
||||
+ tcp_sk(sk)->fast_ack_mode = 0;
|
||||
if (icsk->icsk_ca_ops->init)
|
||||
icsk->icsk_ca_ops->init(sk);
|
||||
if (tcp_ca_needs_ecn(sk))
|
||||
--- a/net/ipv4/tcp_input.c
|
||||
+++ b/net/ipv4/tcp_input.c
|
||||
@@ -5811,13 +5811,14 @@ static void __tcp_ack_snd_check(struct s
|
||||
|
||||
/* More than one full frame received... */
|
||||
if (((tp->rcv_nxt - tp->rcv_wup) > inet_csk(sk)->icsk_ack.rcv_mss &&
|
||||
+ (tp->fast_ack_mode == 1 ||
|
||||
/* ... and right edge of window advances far enough.
|
||||
* (tcp_recvmsg() will send ACK otherwise).
|
||||
* If application uses SO_RCVLOWAT, we want send ack now if
|
||||
* we have not received enough bytes to satisfy the condition.
|
||||
*/
|
||||
- (tp->rcv_nxt - tp->copied_seq < sk->sk_rcvlowat ||
|
||||
- __tcp_select_window(sk) >= tp->rcv_wnd)) ||
|
||||
+ (tp->rcv_nxt - tp->copied_seq < sk->sk_rcvlowat ||
|
||||
+ __tcp_select_window(sk) >= tp->rcv_wnd))) ||
|
||||
/* We ACK each frame or... */
|
||||
tcp_in_quickack_mode(sk) ||
|
||||
/* Protocol state mandates a one-time immediate ACK */
|
@@ -1,45 +0,0 @@
|
||||
From 30fc364b7834b5dce9434dfab4adb49d4924ac03 Mon Sep 17 00:00:00 2001
|
||||
From: Jianfeng Wang <jfwang@google.com>
|
||||
Date: Fri, 19 Jun 2020 17:33:45 +0000
|
||||
Subject: [PATCH 12/18] net-tcp_bbr: v2: record app-limited status of
|
||||
TLP-repaired flight
|
||||
|
||||
When sending a TLP retransmit, record whether the outstanding flight
|
||||
of data is application limited. This is important for congestion
|
||||
control modules that want to respond to losses repaired by TLP
|
||||
retransmits. This is important because the following scenarios convey
|
||||
very different information:
|
||||
(1) a packet loss with a small number of packets in flight;
|
||||
(2) a packet loss with the maximum amount of data in flight allowed
|
||||
by the CC module;
|
||||
|
||||
Effort: net-tcp_bbr
|
||||
Change-Id: Ic8ae567caa4e4bfd5fd82c3d4be12a5d9171655e
|
||||
Signed-off-by: Alexandre Frade <kernel@xanmod.org>
|
||||
---
|
||||
include/linux/tcp.h | 3 ++-
|
||||
net/ipv4/tcp_output.c | 1 +
|
||||
2 files changed, 3 insertions(+), 1 deletion(-)
|
||||
|
||||
--- a/include/linux/tcp.h
|
||||
+++ b/include/linux/tcp.h
|
||||
@@ -306,7 +306,8 @@ struct tcp_sock {
|
||||
*/
|
||||
struct tcp_options_received rx_opt;
|
||||
u8 nonagle : 4,/* Disable Nagle algorithm? */
|
||||
- rate_app_limited:1; /* rate_{delivered,interval_us} limited? */
|
||||
+ rate_app_limited:1, /* rate_{delivered,interval_us} limited? */
|
||||
+ tlp_orig_data_app_limited:1; /* app-limited before TLP rtx? */
|
||||
__cacheline_group_end(tcp_sock_write_txrx);
|
||||
|
||||
/* RX read-write hotpath cache lines */
|
||||
--- a/net/ipv4/tcp_output.c
|
||||
+++ b/net/ipv4/tcp_output.c
|
||||
@@ -3013,6 +3013,7 @@ void tcp_send_loss_probe(struct sock *sk
|
||||
if (WARN_ON(!skb || !tcp_skb_pcount(skb)))
|
||||
goto rearm_timer;
|
||||
|
||||
+ tp->tlp_orig_data_app_limited = TCP_SKB_CB(skb)->tx.is_app_limited;
|
||||
if (__tcp_retransmit_skb(sk, skb, 1))
|
||||
goto rearm_timer;
|
||||
|
@@ -1,45 +0,0 @@
|
||||
From 0c90659bb5c4af502726a0c71cecc096ece1fc95 Mon Sep 17 00:00:00 2001
|
||||
From: Jianfeng Wang <jfwang@google.com>
|
||||
Date: Tue, 16 Jun 2020 17:41:19 +0000
|
||||
Subject: [PATCH 13/18] net-tcp_bbr: v2: inform CC module of losses repaired by
|
||||
TLP probe
|
||||
|
||||
Before this commit, when there is a packet loss that creates a sequence
|
||||
hole that is filled by a TLP loss probe, then tcp_process_tlp_ack()
|
||||
only informs the congestion control (CC) module via a back-to-back entry
|
||||
and exit of CWR. But some congestion control modules (e.g. BBR) do not
|
||||
respond to CWR events.
|
||||
|
||||
This commit adds a new CA event with which the core TCP stack notifies
|
||||
the CC module when a loss is repaired by a TLP. This will allow CC
|
||||
modules that do not use the CWR mechanism to have a custom handler for
|
||||
such TLP recoveries.
|
||||
|
||||
Effort: net-tcp_bbr
|
||||
Change-Id: Ieba72332b401b329bff5a641d2b2043a3fb8f632
|
||||
Signed-off-by: Alexandre Frade <kernel@xanmod.org>
|
||||
---
|
||||
include/net/tcp.h | 1 +
|
||||
net/ipv4/tcp_input.c | 1 +
|
||||
2 files changed, 2 insertions(+)
|
||||
|
||||
--- a/include/net/tcp.h
|
||||
+++ b/include/net/tcp.h
|
||||
@@ -1168,6 +1168,7 @@ enum tcp_ca_event {
|
||||
CA_EVENT_LOSS, /* loss timeout */
|
||||
CA_EVENT_ECN_NO_CE, /* ECT set, but not CE marked */
|
||||
CA_EVENT_ECN_IS_CE, /* received CE marked IP packet */
|
||||
+ CA_EVENT_TLP_RECOVERY, /* a lost segment was repaired by TLP probe */
|
||||
};
|
||||
|
||||
/* Information about inbound ACK, passed to cong_ops->in_ack_event() */
|
||||
--- a/net/ipv4/tcp_input.c
|
||||
+++ b/net/ipv4/tcp_input.c
|
||||
@@ -3890,6 +3890,7 @@ static void tcp_process_tlp_ack(struct s
|
||||
/* ACK advances: there was a loss, so reduce cwnd. Reset
|
||||
* tlp_high_seq in tcp_init_cwnd_reduction()
|
||||
*/
|
||||
+ tcp_ca_event(sk, CA_EVENT_TLP_RECOVERY);
|
||||
tcp_init_cwnd_reduction(sk);
|
||||
tcp_set_ca_state(sk, TCP_CA_CWR);
|
||||
tcp_end_cwnd_reduction(sk);
|
@@ -1,73 +0,0 @@
|
||||
From 0c9501f0d0743b7ab6958e064760b773b3bdf19b Mon Sep 17 00:00:00 2001
|
||||
From: Neal Cardwell <ncardwell@google.com>
|
||||
Date: Mon, 21 Sep 2020 14:46:26 -0400
|
||||
Subject: [PATCH 14/18] net-tcp_bbr: v2: introduce is_acking_tlp_retrans_seq
|
||||
into rate_sample
|
||||
|
||||
Introduce is_acking_tlp_retrans_seq into rate_sample. This bool will
|
||||
export to the CC module the knowledge of whether the current ACK
|
||||
matched a TLP retransmit.
|
||||
|
||||
Note that when this bool is true, we cannot yet tell (in general) whether
|
||||
this ACK is for the original or the TLP retransmit.
|
||||
|
||||
Effort: net-tcp_bbr
|
||||
Change-Id: I2e6494332167e75efcbdc99bd5c119034e9c39b4
|
||||
Signed-off-by: Alexandre Frade <kernel@xanmod.org>
|
||||
---
|
||||
include/net/tcp.h | 1 +
|
||||
net/ipv4/tcp_input.c | 12 +++++++++---
|
||||
2 files changed, 10 insertions(+), 3 deletions(-)
|
||||
|
||||
--- a/include/net/tcp.h
|
||||
+++ b/include/net/tcp.h
|
||||
@@ -1232,6 +1232,7 @@ struct rate_sample {
|
||||
u32 last_end_seq; /* end_seq of most recently ACKed packet */
|
||||
bool is_app_limited; /* is sample from packet with bubble in pipe? */
|
||||
bool is_retrans; /* is sample from retransmission? */
|
||||
+ bool is_acking_tlp_retrans_seq; /* ACKed a TLP retransmit sequence? */
|
||||
bool is_ack_delayed; /* is this (likely) a delayed ACK? */
|
||||
bool is_ece; /* did this ACK have ECN marked? */
|
||||
};
|
||||
--- a/net/ipv4/tcp_input.c
|
||||
+++ b/net/ipv4/tcp_input.c
|
||||
@@ -3873,7 +3873,8 @@ static int tcp_replace_ts_recent(struct
|
||||
/* This routine deals with acks during a TLP episode and ends an episode by
|
||||
* resetting tlp_high_seq. Ref: TLP algorithm in draft-ietf-tcpm-rack
|
||||
*/
|
||||
-static void tcp_process_tlp_ack(struct sock *sk, u32 ack, int flag)
|
||||
+static void tcp_process_tlp_ack(struct sock *sk, u32 ack, int flag,
|
||||
+ struct rate_sample *rs)
|
||||
{
|
||||
struct tcp_sock *tp = tcp_sk(sk);
|
||||
|
||||
@@ -3901,6 +3902,11 @@ static void tcp_process_tlp_ack(struct s
|
||||
FLAG_NOT_DUP | FLAG_DATA_SACKED))) {
|
||||
/* Pure dupack: original and TLP probe arrived; no loss */
|
||||
tp->tlp_high_seq = 0;
|
||||
+ } else {
|
||||
+ /* This ACK matches a TLP retransmit. We cannot yet tell if
|
||||
+ * this ACK is for the original or the TLP retransmit.
|
||||
+ */
|
||||
+ rs->is_acking_tlp_retrans_seq = 1;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -4086,7 +4092,7 @@ static int tcp_ack(struct sock *sk, cons
|
||||
tcp_in_ack_event(sk, flag);
|
||||
|
||||
if (tp->tlp_high_seq)
|
||||
- tcp_process_tlp_ack(sk, ack, flag);
|
||||
+ tcp_process_tlp_ack(sk, ack, flag, &rs);
|
||||
|
||||
if (tcp_ack_is_dubious(sk, flag)) {
|
||||
if (!(flag & (FLAG_SND_UNA_ADVANCED |
|
||||
@@ -4131,7 +4137,7 @@ no_queue:
|
||||
tcp_ack_probe(sk);
|
||||
|
||||
if (tp->tlp_high_seq)
|
||||
- tcp_process_tlp_ack(sk, ack, flag);
|
||||
+ tcp_process_tlp_ack(sk, ack, flag, &rs);
|
||||
return 1;
|
||||
|
||||
old_ack:
|
@@ -1,112 +0,0 @@
|
||||
From c70e032f0effa66e1f67ae0a5ed65fac83c6c267 Mon Sep 17 00:00:00 2001
|
||||
From: David Morley <morleyd@google.com>
|
||||
Date: Fri, 14 Jul 2023 11:07:56 -0400
|
||||
Subject: [PATCH 15/18] tcp: introduce per-route feature RTAX_FEATURE_ECN_LOW
|
||||
|
||||
Define and implement a new per-route feature, RTAX_FEATURE_ECN_LOW.
|
||||
|
||||
This feature indicates that the given destination network is a
|
||||
low-latency ECN environment, meaning both that ECN CE marks are
|
||||
applied by the network using a low-latency marking threshold and also
|
||||
that TCP endpoints provide precise per-data-segment ECN feedback in
|
||||
ACKs (where the ACK ECE flag echoes the received CE status of all
|
||||
newly-acknowledged data segments). This feature indication can be used
|
||||
by congestion control algorithms to decide how to interpret ECN
|
||||
signals over the given destination network.
|
||||
|
||||
This feature is appropriate for datacenter-style ECN marking, such as
|
||||
the ECN marking approach expected by DCTCP or BBR congestion control
|
||||
modules.
|
||||
|
||||
Signed-off-by: David Morley <morleyd@google.com>
|
||||
Signed-off-by: Neal Cardwell <ncardwell@google.com>
|
||||
Signed-off-by: Yuchung Cheng <ycheng@google.com>
|
||||
Tested-by: David Morley <morleyd@google.com>
|
||||
Change-Id: I6bc06e9c6cb426fbae7243fc71c9a8c18175f5d3
|
||||
Signed-off-by: Alexandre Frade <kernel@xanmod.org>
|
||||
---
|
||||
include/net/tcp.h | 10 ++++++++++
|
||||
include/uapi/linux/rtnetlink.h | 4 +++-
|
||||
net/ipv4/tcp_minisocks.c | 2 ++
|
||||
net/ipv4/tcp_output.c | 6 ++++--
|
||||
4 files changed, 19 insertions(+), 3 deletions(-)
|
||||
|
||||
--- a/include/net/tcp.h
|
||||
+++ b/include/net/tcp.h
|
||||
@@ -379,6 +379,7 @@ static inline void tcp_dec_quickack_mode
|
||||
#define TCP_ECN_DEMAND_CWR BIT(2)
|
||||
#define TCP_ECN_SEEN BIT(3)
|
||||
#define TCP_ECN_MODE_ACCECN BIT(4)
|
||||
+#define TCP_ECN_LOW BIT(5)
|
||||
|
||||
#define TCP_ECN_DISABLED 0
|
||||
#define TCP_ECN_MODE_PENDING (TCP_ECN_MODE_RFC3168 | TCP_ECN_MODE_ACCECN)
|
||||
@@ -840,6 +841,15 @@ static inline void tcp_fast_path_check(s
|
||||
|
||||
u32 tcp_delack_max(const struct sock *sk);
|
||||
|
||||
+static inline void tcp_set_ecn_low_from_dst(struct sock *sk,
|
||||
+ const struct dst_entry *dst)
|
||||
+{
|
||||
+ struct tcp_sock *tp = tcp_sk(sk);
|
||||
+
|
||||
+ if (dst_feature(dst, RTAX_FEATURE_ECN_LOW))
|
||||
+ tp->ecn_flags |= TCP_ECN_LOW;
|
||||
+}
|
||||
+
|
||||
/* Compute the actual rto_min value */
|
||||
static inline u32 tcp_rto_min(const struct sock *sk)
|
||||
{
|
||||
--- a/include/uapi/linux/rtnetlink.h
|
||||
+++ b/include/uapi/linux/rtnetlink.h
|
||||
@@ -517,12 +517,14 @@ enum {
|
||||
#define RTAX_FEATURE_TIMESTAMP (1 << 2) /* unused */
|
||||
#define RTAX_FEATURE_ALLFRAG (1 << 3) /* unused */
|
||||
#define RTAX_FEATURE_TCP_USEC_TS (1 << 4)
|
||||
+#define RTAX_FEATURE_ECN_LOW (1 << 5)
|
||||
|
||||
#define RTAX_FEATURE_MASK (RTAX_FEATURE_ECN | \
|
||||
RTAX_FEATURE_SACK | \
|
||||
RTAX_FEATURE_TIMESTAMP | \
|
||||
RTAX_FEATURE_ALLFRAG | \
|
||||
- RTAX_FEATURE_TCP_USEC_TS)
|
||||
+ RTAX_FEATURE_TCP_USEC_TS | \
|
||||
+ RTAX_FEATURE_ECN_LOW)
|
||||
|
||||
struct rta_session {
|
||||
__u8 proto;
|
||||
--- a/net/ipv4/tcp_minisocks.c
|
||||
+++ b/net/ipv4/tcp_minisocks.c
|
||||
@@ -472,6 +472,8 @@ void tcp_ca_openreq_child(struct sock *s
|
||||
u32 ca_key = dst_metric(dst, RTAX_CC_ALGO);
|
||||
bool ca_got_dst = false;
|
||||
|
||||
+ tcp_set_ecn_low_from_dst(sk, dst);
|
||||
+
|
||||
if (ca_key != TCP_CA_UNSPEC) {
|
||||
const struct tcp_congestion_ops *ca;
|
||||
|
||||
--- a/net/ipv4/tcp_output.c
|
||||
+++ b/net/ipv4/tcp_output.c
|
||||
@@ -339,10 +339,9 @@ static void tcp_ecn_send_syn(struct sock
|
||||
bool bpf_needs_ecn = tcp_bpf_ca_needs_ecn(sk);
|
||||
bool use_ecn = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_ecn) == 1 ||
|
||||
tcp_ca_needs_ecn(sk) || bpf_needs_ecn;
|
||||
+ const struct dst_entry *dst = __sk_dst_get(sk);
|
||||
|
||||
if (!use_ecn) {
|
||||
- const struct dst_entry *dst = __sk_dst_get(sk);
|
||||
-
|
||||
if (dst && dst_feature(dst, RTAX_FEATURE_ECN))
|
||||
use_ecn = true;
|
||||
}
|
||||
@@ -354,6 +353,9 @@ static void tcp_ecn_send_syn(struct sock
|
||||
tcp_ecn_mode_set(tp, TCP_ECN_MODE_RFC3168);
|
||||
if (tcp_ca_needs_ecn(sk) || bpf_needs_ecn)
|
||||
INET_ECN_xmit(sk);
|
||||
+
|
||||
+ if (dst)
|
||||
+ tcp_set_ecn_low_from_dst(sk, dst);
|
||||
}
|
||||
}
|
||||
|
@@ -1,59 +0,0 @@
|
||||
From 45615e9d62a6b0b688a284dc712c243295df9e65 Mon Sep 17 00:00:00 2001
|
||||
From: Adithya Abraham Philip <abrahamphilip@google.com>
|
||||
Date: Fri, 11 Jun 2021 21:56:10 +0000
|
||||
Subject: [PATCH 17/18] net-tcp_bbr: v3: ensure ECN-enabled BBR flows set ECT
|
||||
on retransmits
|
||||
|
||||
Adds a new flag TCP_ECN_ECT_PERMANENT that is used by CCAs to
|
||||
indicate that retransmitted packets and pure ACKs must have the
|
||||
ECT bit set. This is necessary for BBR, which when using
|
||||
ECN expects ECT to be set even on retransmitted packets and ACKs.
|
||||
|
||||
Previous to this addition of TCP_ECN_ECT_PERMANENT, CCAs which can use
|
||||
ECN but don't "need" it did not have a way to indicate that ECT should
|
||||
be set on retransmissions/ACKs.
|
||||
|
||||
Signed-off-by: Adithya Abraham Philip <abrahamphilip@google.com>
|
||||
Signed-off-by: Neal Cardwell <ncardwell@google.com>
|
||||
Change-Id: I8b048eaab35e136fe6501ef6cd89fd9faa15e6d2
|
||||
Signed-off-by: Alexandre Frade <kernel@xanmod.org>
|
||||
---
|
||||
include/net/tcp.h | 1 +
|
||||
net/ipv4/tcp_bbr.c | 3 +++
|
||||
net/ipv4/tcp_output.c | 3 ++-
|
||||
3 files changed, 6 insertions(+), 1 deletion(-)
|
||||
|
||||
--- a/include/net/tcp.h
|
||||
+++ b/include/net/tcp.h
|
||||
@@ -380,6 +380,7 @@ static inline void tcp_dec_quickack_mode
|
||||
#define TCP_ECN_SEEN BIT(3)
|
||||
#define TCP_ECN_MODE_ACCECN BIT(4)
|
||||
#define TCP_ECN_LOW BIT(5)
|
||||
+#define TCP_ECN_ECT_PERMANENT BIT(6)
|
||||
|
||||
#define TCP_ECN_DISABLED 0
|
||||
#define TCP_ECN_MODE_PENDING (TCP_ECN_MODE_RFC3168 | TCP_ECN_MODE_ACCECN)
|
||||
--- a/net/ipv4/tcp_bbr.c
|
||||
+++ b/net/ipv4/tcp_bbr.c
|
||||
@@ -2154,6 +2154,9 @@ __bpf_kfunc static void bbr_init(struct
|
||||
bbr->plb.pause_until = 0;
|
||||
|
||||
tp->fast_ack_mode = bbr_fast_ack_mode ? 1 : 0;
|
||||
+
|
||||
+ if (bbr_can_use_ecn(sk))
|
||||
+ tp->ecn_flags |= TCP_ECN_ECT_PERMANENT;
|
||||
}
|
||||
|
||||
/* BBR marks the current round trip as a loss round. */
|
||||
--- a/net/ipv4/tcp_output.c
|
||||
+++ b/net/ipv4/tcp_output.c
|
||||
@@ -393,7 +393,8 @@ static void tcp_ecn_send(struct sock *sk
|
||||
th->cwr = 1;
|
||||
skb_shinfo(skb)->gso_type |= SKB_GSO_TCP_ECN;
|
||||
}
|
||||
- } else if (!tcp_ca_needs_ecn(sk)) {
|
||||
+ } else if (!(tp->ecn_flags & TCP_ECN_ECT_PERMANENT) &&
|
||||
+ !tcp_ca_needs_ecn(sk)) {
|
||||
/* ACK or retransmitted segment: clear ECT|CE */
|
||||
INET_ECN_dontxmit(sk);
|
||||
}
|
@@ -1,38 +0,0 @@
|
||||
From 461bd4bd12039886127019682ba6e7f372d9fa0b Mon Sep 17 00:00:00 2001
|
||||
From: Neal Cardwell <ncardwell@google.com>
|
||||
Date: Sun, 23 Jul 2023 23:25:34 -0400
|
||||
Subject: [PATCH 18/18] tcp: export TCPI_OPT_ECN_LOW in tcp_info tcpi_options
|
||||
field
|
||||
|
||||
Analogous to other important ECN information, export TCPI_OPT_ECN_LOW
|
||||
in tcp_info tcpi_options field.
|
||||
|
||||
Signed-off-by: Neal Cardwell <ncardwell@google.com>
|
||||
Change-Id: I08d8d8c7e8780e6e37df54038ee50301ac5a0320
|
||||
Signed-off-by: Alexandre Frade <kernel@xanmod.org>
|
||||
---
|
||||
include/uapi/linux/tcp.h | 1 +
|
||||
net/ipv4/tcp.c | 2 ++
|
||||
2 files changed, 3 insertions(+)
|
||||
|
||||
--- a/include/uapi/linux/tcp.h
|
||||
+++ b/include/uapi/linux/tcp.h
|
||||
@@ -184,6 +184,7 @@ enum tcp_fastopen_client_fail {
|
||||
#define TCPI_OPT_ECN_SEEN 16 /* we received at least one packet with ECT */
|
||||
#define TCPI_OPT_SYN_DATA 32 /* SYN-ACK acked data in SYN sent or rcvd */
|
||||
#define TCPI_OPT_USEC_TS 64 /* usec timestamps */
|
||||
+#define TCPI_OPT_ECN_LOW 128 /* Low-latency ECN enabled at conn init */
|
||||
|
||||
/*
|
||||
* Sender's congestion state indicating normal or abnormal situations
|
||||
--- a/net/ipv4/tcp.c
|
||||
+++ b/net/ipv4/tcp.c
|
||||
@@ -4159,6 +4159,8 @@ void tcp_get_info(struct sock *sk, struc
|
||||
info->tcpi_options |= TCPI_OPT_ECN;
|
||||
if (tp->ecn_flags & TCP_ECN_SEEN)
|
||||
info->tcpi_options |= TCPI_OPT_ECN_SEEN;
|
||||
+ if (tp->ecn_flags & TCP_ECN_LOW)
|
||||
+ info->tcpi_options |= TCPI_OPT_ECN_LOW;
|
||||
if (tp->syn_data_acked)
|
||||
info->tcpi_options |= TCPI_OPT_SYN_DATA;
|
||||
if (tp->tcp_usec_ts)
|
@@ -1,4 +1,4 @@
|
||||
From 1b7e9ad0803cef8cf087bb67a6e4c8d63a02405b Mon Sep 17 00:00:00 2001
|
||||
From 4941d9d4ebd5acbee74245ce22f32eca2e320d98 Mon Sep 17 00:00:00 2001
|
||||
From: "mfreemon@cloudflare.com" <mfreemon@cloudflare.com>
|
||||
Date: Tue, 1 Mar 2022 17:06:02 -0600
|
||||
Subject: [PATCH] tcp: Add a sysctl to skip tcp collapse processing when the
|
||||
@@ -41,7 +41,7 @@ Signed-off-by: Alexandre Frade <kernel@xanmod.org>
|
||||
|
||||
--- a/include/net/netns/ipv4.h
|
||||
+++ b/include/net/netns/ipv4.h
|
||||
@@ -230,6 +230,7 @@ struct netns_ipv4 {
|
||||
@@ -240,6 +240,7 @@ struct netns_ipv4 {
|
||||
|
||||
u8 sysctl_fib_notify_on_flag_change;
|
||||
u8 sysctl_tcp_syn_linear_timeouts;
|
||||
@@ -51,8 +51,8 @@ Signed-off-by: Alexandre Frade <kernel@xanmod.org>
|
||||
u8 sysctl_udp_l3mdev_accept;
|
||||
--- a/include/trace/events/tcp.h
|
||||
+++ b/include/trace/events/tcp.h
|
||||
@@ -213,6 +213,13 @@ DEFINE_EVENT(tcp_event_sk, tcp_rcv_space
|
||||
TP_ARGS(sk)
|
||||
@@ -286,6 +286,13 @@ TRACE_EVENT(tcp_rcvbuf_grow,
|
||||
__entry->sock_cookie)
|
||||
);
|
||||
|
||||
+DEFINE_EVENT(tcp_event_sk, tcp_collapse_max_bytes_exceeded,
|
||||
@@ -83,7 +83,7 @@ Signed-off-by: Alexandre Frade <kernel@xanmod.org>
|
||||
.maxlen = sizeof(u8),
|
||||
--- a/net/ipv4/tcp_input.c
|
||||
+++ b/net/ipv4/tcp_input.c
|
||||
@@ -5693,6 +5693,7 @@ static bool tcp_prune_ofo_queue(struct s
|
||||
@@ -5695,6 +5695,7 @@ static bool tcp_prune_ofo_queue(struct s
|
||||
static int tcp_prune_queue(struct sock *sk, const struct sk_buff *in_skb)
|
||||
{
|
||||
struct tcp_sock *tp = tcp_sk(sk);
|
||||
@@ -91,7 +91,7 @@ Signed-off-by: Alexandre Frade <kernel@xanmod.org>
|
||||
|
||||
NET_INC_STATS(sock_net(sk), LINUX_MIB_PRUNECALLED);
|
||||
|
||||
@@ -5704,6 +5705,39 @@ static int tcp_prune_queue(struct sock *
|
||||
@@ -5706,6 +5707,39 @@ static int tcp_prune_queue(struct sock *
|
||||
if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf)
|
||||
return 0;
|
||||
|
||||
@@ -131,7 +131,7 @@ Signed-off-by: Alexandre Frade <kernel@xanmod.org>
|
||||
tcp_collapse_ofo_queue(sk);
|
||||
if (!skb_queue_empty(&sk->sk_receive_queue))
|
||||
tcp_collapse(sk, &sk->sk_receive_queue, NULL,
|
||||
@@ -5722,6 +5756,8 @@ static int tcp_prune_queue(struct sock *
|
||||
@@ -5724,6 +5758,8 @@ static int tcp_prune_queue(struct sock *
|
||||
if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf)
|
||||
return 0;
|
||||
|
||||
@@ -142,7 +142,7 @@ Signed-off-by: Alexandre Frade <kernel@xanmod.org>
|
||||
* and hopefully then we'll have sufficient space.
|
||||
--- a/net/ipv4/tcp_ipv4.c
|
||||
+++ b/net/ipv4/tcp_ipv4.c
|
||||
@@ -3541,6 +3541,7 @@ static int __net_init tcp_sk_init(struct
|
||||
@@ -3542,6 +3542,7 @@ static int __net_init tcp_sk_init(struct
|
||||
|
||||
net->ipv4.sysctl_tcp_syn_linear_timeouts = 4;
|
||||
net->ipv4.sysctl_tcp_shrink_window = 0;
|
||||
|
@@ -1,4 +1,4 @@
|
||||
From c98d1c0e1f4b119313eb5852ccbf14b748c5d4a4 Mon Sep 17 00:00:00 2001
|
||||
From 0d49c54082a9e3504feb517fc5e0095798bc5f14 Mon Sep 17 00:00:00 2001
|
||||
From: Mark Weiman <mark.weiman@markzz.com>
|
||||
Date: Sun, 12 Aug 2018 11:36:21 -0400
|
||||
Subject: [PATCH] PCI: Enable overrides for missing ACS capabilities
|
||||
@@ -55,7 +55,7 @@ Signed-off-by: Alexandre Frade <kernel@xanmod.org>
|
||||
|
||||
--- a/Documentation/admin-guide/kernel-parameters.txt
|
||||
+++ b/Documentation/admin-guide/kernel-parameters.txt
|
||||
@@ -4691,6 +4691,15 @@
|
||||
@@ -4734,6 +4734,15 @@
|
||||
nomsi [MSI] If the PCI_MSI kernel config parameter is
|
||||
enabled, this kernel boot option can be used to
|
||||
disable the use of MSI interrupts system-wide.
|
||||
@@ -73,7 +73,7 @@ Signed-off-by: Alexandre Frade <kernel@xanmod.org>
|
||||
should never be necessary.
|
||||
--- a/drivers/pci/quirks.c
|
||||
+++ b/drivers/pci/quirks.c
|
||||
@@ -3749,6 +3749,106 @@ static void quirk_no_bus_reset(struct pc
|
||||
@@ -3745,6 +3745,106 @@ static void quirk_no_bus_reset(struct pc
|
||||
dev->dev_flags |= PCI_DEV_FLAGS_NO_BUS_RESET;
|
||||
}
|
||||
|
||||
@@ -180,7 +180,7 @@ Signed-off-by: Alexandre Frade <kernel@xanmod.org>
|
||||
/*
|
||||
* Some NVIDIA GPU devices do not work with bus reset, SBR needs to be
|
||||
* prevented for those affected devices.
|
||||
@@ -5196,6 +5296,8 @@ static const struct pci_dev_acs_enabled
|
||||
@@ -5192,6 +5292,8 @@ static const struct pci_dev_acs_enabled
|
||||
{ PCI_VENDOR_ID_ZHAOXIN, PCI_ANY_ID, pci_quirk_zhaoxin_pcie_ports_acs },
|
||||
/* Wangxun nics */
|
||||
{ PCI_VENDOR_ID_WANGXUN, PCI_ANY_ID, pci_quirk_wangxun_nic_acs },
|
||||
|
@@ -1,4 +1,4 @@
|
||||
From 14d0907ef9d3f5c708d6aff478e32c64cda3d488 Mon Sep 17 00:00:00 2001
|
||||
From 2ab619d115fa9b850f95844ab55745d27a637f43 Mon Sep 17 00:00:00 2001
|
||||
From: Andrey Smirnov <andrew.smirnov@gmail.com>
|
||||
Date: Sun, 27 Feb 2022 14:46:08 -0800
|
||||
Subject: [PATCH 1/6] extcon: Add driver for Steam Deck
|
||||
|
@@ -1,4 +1,4 @@
|
||||
From 3c5ff39975ce84f9d395349445a8742d58f16a20 Mon Sep 17 00:00:00 2001
|
||||
From 7da220466b677e5022380e42899e2521d24bb848 Mon Sep 17 00:00:00 2001
|
||||
From: Andrey Smirnov <andrew.smirnov@gmail.com>
|
||||
Date: Sat, 19 Feb 2022 16:09:45 -0800
|
||||
Subject: [PATCH 2/6] hwmon: Add driver for Steam Deck's EC sensors
|
||||
@@ -17,7 +17,7 @@ Signed-off-by: Alexandre Frade <kernel@xanmod.org>
|
||||
|
||||
--- a/drivers/hwmon/Kconfig
|
||||
+++ b/drivers/hwmon/Kconfig
|
||||
@@ -2110,6 +2110,17 @@ config SENSORS_SCH5636
|
||||
@@ -2118,6 +2118,17 @@ config SENSORS_SCH5636
|
||||
This driver can also be built as a module. If so, the module
|
||||
will be called sch5636.
|
||||
|
||||
@@ -37,7 +37,7 @@ Signed-off-by: Alexandre Frade <kernel@xanmod.org>
|
||||
depends on I2C
|
||||
--- a/drivers/hwmon/Makefile
|
||||
+++ b/drivers/hwmon/Makefile
|
||||
@@ -213,6 +213,7 @@ obj-$(CONFIG_SENSORS_SMSC47M1) += smsc47
|
||||
@@ -215,6 +215,7 @@ obj-$(CONFIG_SENSORS_SMSC47M1) += smsc47
|
||||
obj-$(CONFIG_SENSORS_SMSC47M192)+= smsc47m192.o
|
||||
obj-$(CONFIG_SENSORS_SPARX5) += sparx5-temp.o
|
||||
obj-$(CONFIG_SENSORS_SPD5118) += spd5118.o
|
||||
|
@@ -1,4 +1,4 @@
|
||||
From 5e06cdcc7f6bf61b94a61f5b421573d2e12c0575 Mon Sep 17 00:00:00 2001
|
||||
From d4dabdc6f5c3cedfb4f5bb1601624a6b8721f8f6 Mon Sep 17 00:00:00 2001
|
||||
From: Andrey Smirnov <andrew.smirnov@gmail.com>
|
||||
Date: Sat, 15 Jul 2023 12:58:54 -0700
|
||||
Subject: [PATCH 3/6] hwmon: steamdeck-hwmon: Add support for max battery
|
||||
|
@@ -1,4 +1,4 @@
|
||||
From a73fb7a13f21fdee3ffe8f900f32b7f3a325e60a Mon Sep 17 00:00:00 2001
|
||||
From 9ee8b0eb23529624f714ddf64177a98394f9d798 Mon Sep 17 00:00:00 2001
|
||||
From: Andrey Smirnov <andrew.smirnov@gmail.com>
|
||||
Date: Sun, 27 Feb 2022 12:58:05 -0800
|
||||
Subject: [PATCH 4/6] leds: steamdeck: Add support for Steam Deck LED
|
||||
@@ -15,7 +15,7 @@ Signed-off-by: Alexandre Frade <kernel@xanmod.org>
|
||||
|
||||
--- a/drivers/leds/Kconfig
|
||||
+++ b/drivers/leds/Kconfig
|
||||
@@ -1013,6 +1013,13 @@ config LEDS_ACER_A500
|
||||
@@ -1020,6 +1020,13 @@ config LEDS_ACER_A500
|
||||
This option enables support for the Power Button LED of
|
||||
Acer Iconia Tab A500.
|
||||
|
||||
@@ -31,7 +31,7 @@ Signed-off-by: Alexandre Frade <kernel@xanmod.org>
|
||||
comment "Flash and Torch LED drivers"
|
||||
--- a/drivers/leds/Makefile
|
||||
+++ b/drivers/leds/Makefile
|
||||
@@ -85,6 +85,7 @@ obj-$(CONFIG_LEDS_QNAP_MCU) += leds-qna
|
||||
@@ -86,6 +86,7 @@ obj-$(CONFIG_LEDS_QNAP_MCU) += leds-qna
|
||||
obj-$(CONFIG_LEDS_REGULATOR) += leds-regulator.o
|
||||
obj-$(CONFIG_LEDS_SC27XX_BLTC) += leds-sc27xx-bltc.o
|
||||
obj-$(CONFIG_LEDS_ST1202) += leds-st1202.o
|
||||
|
@@ -1,4 +1,4 @@
|
||||
From eaf78d7b957552deba7222ca6bd1dae28bdd420b Mon Sep 17 00:00:00 2001
|
||||
From 3a053cd6802ab65b8119140afda40921de19361d Mon Sep 17 00:00:00 2001
|
||||
From: Andrey Smirnov <andrew.smirnov@gmail.com>
|
||||
Date: Sat, 19 Feb 2022 16:08:36 -0800
|
||||
Subject: [PATCH 5/6] mfd: Add MFD core driver for Steam Deck
|
||||
@@ -19,7 +19,7 @@ Signed-off-by: Alexandre Frade <kernel@xanmod.org>
|
||||
|
||||
--- a/drivers/mfd/Kconfig
|
||||
+++ b/drivers/mfd/Kconfig
|
||||
@@ -2422,5 +2422,16 @@ config MFD_UPBOARD_FPGA
|
||||
@@ -2463,5 +2463,16 @@ config MFD_UPBOARD_FPGA
|
||||
To compile this driver as a module, choose M here: the module will be
|
||||
called upboard-fpga.
|
||||
|
||||
@@ -38,7 +38,7 @@ Signed-off-by: Alexandre Frade <kernel@xanmod.org>
|
||||
endif
|
||||
--- a/drivers/mfd/Makefile
|
||||
+++ b/drivers/mfd/Makefile
|
||||
@@ -290,3 +290,5 @@ obj-$(CONFIG_MFD_RSMU_I2C) += rsmu_i2c.o
|
||||
@@ -294,3 +294,5 @@ obj-$(CONFIG_MFD_RSMU_I2C) += rsmu_i2c.o
|
||||
obj-$(CONFIG_MFD_RSMU_SPI) += rsmu_spi.o rsmu_core.o
|
||||
|
||||
obj-$(CONFIG_MFD_UPBOARD_FPGA) += upboard-fpga.o
|
||||
|
@@ -1,4 +1,4 @@
|
||||
From 1755d1224560a5d3379489cf83efefdf6fd7d93c Mon Sep 17 00:00:00 2001
|
||||
From 97d0aafc2f115cc2df7fd486658659d28930e66c Mon Sep 17 00:00:00 2001
|
||||
From: Andrey Smirnov <andrew.smirnov@gmail.com>
|
||||
Date: Sun, 24 Sep 2023 15:02:33 -0700
|
||||
Subject: [PATCH 6/6] mfd: steamdeck: Expose controller board power in sysfs
|
||||
|
@@ -1,7 +1,7 @@
|
||||
From 7479efa37dfb05263e0984ca1e1a3da22fa62414 Mon Sep 17 00:00:00 2001
|
||||
From 8f188bee3819f45c66b00ffd8a8f3b65247efcdb Mon Sep 17 00:00:00 2001
|
||||
From: Alexandre Frade <kernel@xanmod.org>
|
||||
Date: Thu, 28 Nov 2024 22:55:27 +0000
|
||||
Subject: [PATCH 03/19] kbuild: Re-add .config file required to sign external
|
||||
Subject: [PATCH 05/20] kbuild: Re-add .config file required to sign external
|
||||
modules
|
||||
|
||||
Signed-off-by: Alexandre Frade <kernel@xanmod.org>
|
||||
|
@@ -1,7 +1,7 @@
|
||||
From 3cd805916cf93d70ef73a006ed54c737c1bb44ca Mon Sep 17 00:00:00 2001
|
||||
From dc18410893372cca1aad9e912fd119f462b2d89a Mon Sep 17 00:00:00 2001
|
||||
From: Alexandre Frade <kernel@xanmod.org>
|
||||
Date: Thu, 11 May 2023 19:41:41 +0000
|
||||
Subject: [PATCH 05/19] XANMOD: fair: Set scheduler tunable latencies to
|
||||
Subject: [PATCH 06/20] XANMOD: fair: Set scheduler tunable latencies to
|
||||
unscaled
|
||||
|
||||
Signed-off-by: Alexandre Frade <kernel@xanmod.org>
|
@@ -1,76 +0,0 @@
|
||||
From 0d678f81894ace50347c6223255b8263161299fe Mon Sep 17 00:00:00 2001
|
||||
From: Alexandre Frade <kernel@xanmod.org>
|
||||
Date: Sat, 31 Aug 2024 16:57:41 +0000
|
||||
Subject: [PATCH 04/19] kbuild: Remove GCC minimal function alignment
|
||||
|
||||
Signed-off-by: Alexandre Frade <kernel@xanmod.org>
|
||||
---
|
||||
Makefile | 7 -------
|
||||
arch/Kconfig | 12 ------------
|
||||
include/linux/compiler_types.h | 10 +++++-----
|
||||
3 files changed, 5 insertions(+), 24 deletions(-)
|
||||
|
||||
--- a/Makefile
|
||||
+++ b/Makefile
|
||||
@@ -1058,15 +1058,8 @@ export CC_FLAGS_FPU
|
||||
export CC_FLAGS_NO_FPU
|
||||
|
||||
ifneq ($(CONFIG_FUNCTION_ALIGNMENT),0)
|
||||
-# Set the minimal function alignment. Use the newer GCC option
|
||||
-# -fmin-function-alignment if it is available, or fall back to -falign-funtions.
|
||||
-# See also CONFIG_CC_HAS_SANE_FUNCTION_ALIGNMENT.
|
||||
-ifdef CONFIG_CC_HAS_MIN_FUNCTION_ALIGNMENT
|
||||
-KBUILD_CFLAGS += -fmin-function-alignment=$(CONFIG_FUNCTION_ALIGNMENT)
|
||||
-else
|
||||
KBUILD_CFLAGS += -falign-functions=$(CONFIG_FUNCTION_ALIGNMENT)
|
||||
endif
|
||||
-endif
|
||||
|
||||
# arch Makefile may override CC so keep this after arch Makefile is included
|
||||
NOSTDINC_FLAGS += -nostdinc
|
||||
--- a/arch/Kconfig
|
||||
+++ b/arch/Kconfig
|
||||
@@ -1734,18 +1734,6 @@ config FUNCTION_ALIGNMENT
|
||||
default 4 if FUNCTION_ALIGNMENT_4B
|
||||
default 0
|
||||
|
||||
-config CC_HAS_MIN_FUNCTION_ALIGNMENT
|
||||
- # Detect availability of the GCC option -fmin-function-alignment which
|
||||
- # guarantees minimal alignment for all functions, unlike
|
||||
- # -falign-functions which the compiler ignores for cold functions.
|
||||
- def_bool $(cc-option, -fmin-function-alignment=8)
|
||||
-
|
||||
-config CC_HAS_SANE_FUNCTION_ALIGNMENT
|
||||
- # Set if the guaranteed alignment with -fmin-function-alignment is
|
||||
- # available or extra care is required in the kernel. Clang provides
|
||||
- # strict alignment always, even with -falign-functions.
|
||||
- def_bool CC_HAS_MIN_FUNCTION_ALIGNMENT || CC_IS_CLANG
|
||||
-
|
||||
config ARCH_NEED_CMPXCHG_1_EMU
|
||||
bool
|
||||
|
||||
--- a/include/linux/compiler_types.h
|
||||
+++ b/include/linux/compiler_types.h
|
||||
@@ -99,17 +99,17 @@ static inline void __chk_io_ptr(const vo
|
||||
* gcc: https://gcc.gnu.org/onlinedocs/gcc/Label-Attributes.html#index-cold-label-attribute
|
||||
*
|
||||
* When -falign-functions=N is in use, we must avoid the cold attribute as
|
||||
- * GCC drops the alignment for cold functions. Worse, GCC can implicitly mark
|
||||
- * callees of cold functions as cold themselves, so it's not sufficient to add
|
||||
- * __function_aligned here as that will not ensure that callees are correctly
|
||||
- * aligned.
|
||||
+ * contemporary versions of GCC drop the alignment for cold functions. Worse,
|
||||
+ * GCC can implicitly mark callees of cold functions as cold themselves, so
|
||||
+ * it's not sufficient to add __function_aligned here as that will not ensure
|
||||
+ * that callees are correctly aligned.
|
||||
*
|
||||
* See:
|
||||
*
|
||||
* https://lore.kernel.org/lkml/Y77%2FqVgvaJidFpYt@FVFF77S0Q05N
|
||||
* https://gcc.gnu.org/bugzilla/show_bug.cgi?id=88345#c9
|
||||
*/
|
||||
-#if defined(CONFIG_CC_HAS_SANE_FUNCTION_ALIGNMENT) || (CONFIG_FUNCTION_ALIGNMENT == 0)
|
||||
+#if !defined(CONFIG_CC_IS_GCC) || (CONFIG_FUNCTION_ALIGNMENT == 0)
|
||||
#define __cold __attribute__((__cold__))
|
||||
#else
|
||||
#define __cold
|
@@ -1,7 +1,7 @@
|
||||
From fa6afaf41316657a46bc70c9e942051e15e837fd Mon Sep 17 00:00:00 2001
|
||||
From 3ad875b148a5b6cd734767979c0e99abde0cd27d Mon Sep 17 00:00:00 2001
|
||||
From: Alexandre Frade <kernel@xanmod.org>
|
||||
Date: Sun, 15 Sep 2024 23:03:38 +0000
|
||||
Subject: [PATCH 06/19] XANMOD: sched: Add yield_type sysctl to reduce or
|
||||
Subject: [PATCH 07/20] XANMOD: sched: Add yield_type sysctl to reduce or
|
||||
disable sched_yield
|
||||
|
||||
Signed-off-by: Alexandre Frade <kernel@xanmod.org>
|
||||
@@ -12,7 +12,7 @@ Signed-off-by: Alexandre Frade <kernel@xanmod.org>
|
||||
|
||||
--- a/kernel/sched/syscalls.c
|
||||
+++ b/kernel/sched/syscalls.c
|
||||
@@ -1350,15 +1350,29 @@ SYSCALL_DEFINE3(sched_getaffinity, pid_t
|
||||
@@ -1351,15 +1351,29 @@ SYSCALL_DEFINE3(sched_getaffinity, pid_t
|
||||
return ret;
|
||||
}
|
||||
|
||||
@@ -45,7 +45,7 @@ Signed-off-by: Alexandre Frade <kernel@xanmod.org>
|
||||
rq_unlock_irq(rq, &rf);
|
||||
--- a/kernel/sysctl.c
|
||||
+++ b/kernel/sysctl.c
|
||||
@@ -80,6 +80,8 @@ EXPORT_SYMBOL_GPL(sysctl_long_vals);
|
||||
@@ -73,6 +73,8 @@ EXPORT_SYMBOL_GPL(sysctl_long_vals);
|
||||
|
||||
#if defined(CONFIG_SYSCTL)
|
||||
|
||||
@@ -54,10 +54,10 @@ Signed-off-by: Alexandre Frade <kernel@xanmod.org>
|
||||
/* Constants used for minimum and maximum */
|
||||
static const int ngroups_max = NGROUPS_MAX;
|
||||
static const int cap_last_cap = CAP_LAST_CAP;
|
||||
@@ -1608,6 +1610,15 @@ static const struct ctl_table kern_table
|
||||
.proc_handler = proc_dointvec,
|
||||
},
|
||||
#endif
|
||||
@@ -1585,6 +1587,15 @@ int proc_do_static_key(const struct ctl_
|
||||
}
|
||||
|
||||
static const struct ctl_table kern_table[] = {
|
||||
+ {
|
||||
+ .procname = "yield_type",
|
||||
+ .data = &sysctl_sched_yield_type,
|
||||
@@ -67,6 +67,6 @@ Signed-off-by: Alexandre Frade <kernel@xanmod.org>
|
||||
+ .extra1 = SYSCTL_ZERO,
|
||||
+ .extra2 = SYSCTL_TWO,
|
||||
+ },
|
||||
#ifdef CONFIG_PROC_SYSCTL
|
||||
#ifdef CONFIG_USER_NS
|
||||
{
|
||||
.procname = "tainted",
|
||||
.procname = "unprivileged_userns_clone",
|
@@ -1,7 +1,7 @@
|
||||
From 5dc5d7a3a1c25cd5d7c2079bbe56ff7c0066c76a Mon Sep 17 00:00:00 2001
|
||||
From db31ff982d647321b426693b3779855926a831db Mon Sep 17 00:00:00 2001
|
||||
From: Alexandre Frade <kernel@xanmod.org>
|
||||
Date: Wed, 11 May 2022 18:56:51 +0000
|
||||
Subject: [PATCH 07/19] XANMOD: block/mq-deadline: Increase write priority to
|
||||
Subject: [PATCH 08/20] XANMOD: block/mq-deadline: Increase write priority to
|
||||
improve responsiveness
|
||||
|
||||
Signed-off-by: Alexandre Frade <kernel@xanmod.org>
|
@@ -1,7 +1,7 @@
|
||||
From 83f38053e977907d085d7f27a24f1b2844a03f1c Mon Sep 17 00:00:00 2001
|
||||
From 5a6a3325e1d23b8981e9ed747276a2b6d7d316a2 Mon Sep 17 00:00:00 2001
|
||||
From: Alexandre Frade <kernel@xanmod.org>
|
||||
Date: Thu, 6 Jan 2022 16:59:01 +0000
|
||||
Subject: [PATCH 08/19] XANMOD: block/mq-deadline: Disable front_merges by
|
||||
Subject: [PATCH 09/20] XANMOD: block/mq-deadline: Disable front_merges by
|
||||
default
|
||||
|
||||
Signed-off-by: Alexandre Frade <kernel@xanmod.org>
|
@@ -1,7 +1,7 @@
|
||||
From 6c75a84f9f89c848e76650cc66672246fa62843f Mon Sep 17 00:00:00 2001
|
||||
From 6412c4a256e906ab8bbcddc1be56910613f3c375 Mon Sep 17 00:00:00 2001
|
||||
From: Alexandre Frade <kernel@xanmod.org>
|
||||
Date: Mon, 16 Sep 2024 15:36:01 +0000
|
||||
Subject: [PATCH 09/19] XANMOD: block: Set rq_affinity to force complete I/O
|
||||
Subject: [PATCH 10/20] XANMOD: block: Set rq_affinity to force complete I/O
|
||||
requests on same CPU
|
||||
|
||||
Signed-off-by: Alexandre Frade <kernel@xanmod.org>
|
||||
@@ -11,7 +11,7 @@ Signed-off-by: Alexandre Frade <kernel@xanmod.org>
|
||||
|
||||
--- a/include/linux/blkdev.h
|
||||
+++ b/include/linux/blkdev.h
|
||||
@@ -652,7 +652,8 @@ enum {
|
||||
@@ -655,7 +655,8 @@ enum {
|
||||
QUEUE_FLAG_MAX
|
||||
};
|
||||
|
@@ -1,7 +1,7 @@
|
||||
From 287c275293025e956f2144e55de8cc51eec0811b Mon Sep 17 00:00:00 2001
|
||||
From a16d2a485d2dd60d91fd166c0cd09835d57ce5dc Mon Sep 17 00:00:00 2001
|
||||
From: Alexandre Frade <kernel@xanmod.org>
|
||||
Date: Mon, 15 Jul 2024 04:50:34 +0000
|
||||
Subject: [PATCH 10/19] XANMOD: blk-wbt: Set wbt_default_latency_nsec() to
|
||||
Subject: [PATCH 11/20] XANMOD: blk-wbt: Set wbt_default_latency_nsec() to
|
||||
2msec
|
||||
|
||||
Signed-off-by: Alexandre Frade <kernel@xanmod.org>
|
||||
@@ -11,7 +11,7 @@ Signed-off-by: Alexandre Frade <kernel@xanmod.org>
|
||||
|
||||
--- a/block/blk-wbt.c
|
||||
+++ b/block/blk-wbt.c
|
||||
@@ -727,14 +727,8 @@ EXPORT_SYMBOL_GPL(wbt_enable_default);
|
||||
@@ -730,14 +730,8 @@ EXPORT_SYMBOL_GPL(wbt_enable_default);
|
||||
|
||||
u64 wbt_default_latency_nsec(struct request_queue *q)
|
||||
{
|
@@ -1,7 +1,7 @@
|
||||
From 25c1d0ad74a27ac80dbda2840eba4fe53046ed55 Mon Sep 17 00:00:00 2001
|
||||
From 9c77ecedb9f94f5140e6dcc15d9ab87adcc2d0ac Mon Sep 17 00:00:00 2001
|
||||
From: Alexandre Frade <kernel@xanmod.org>
|
||||
Date: Mon, 29 Jan 2018 17:26:15 +0000
|
||||
Subject: [PATCH 11/19] XANMOD: kconfig: add 500Hz timer interrupt kernel
|
||||
Subject: [PATCH 12/20] XANMOD: kconfig: add 500Hz timer interrupt kernel
|
||||
config option
|
||||
|
||||
Signed-off-by: Alexandre Frade <kernel@xanmod.org>
|
@@ -1,8 +1,8 @@
|
||||
From 185191cd2a98629f35cb5cd6c0116ceb33635dd8 Mon Sep 17 00:00:00 2001
|
||||
From 29bff014ffe0d6179dadb9d7882632c57d93822c Mon Sep 17 00:00:00 2001
|
||||
From: Alexandre Frade <kernel@xanmod.org>
|
||||
Date: Mon, 29 Jan 2018 16:59:22 +0000
|
||||
Subject: [PATCH 12/19] XANMOD: dcache: cache_pressure = 50 decreases the rate
|
||||
at which VFS caches are reclaimed
|
||||
Date: Mon, 28 Jul 2025 17:20:16 +0000
|
||||
Subject: [PATCH 13/20] XANMOD: vfs: Decrease rate at which vfs caches are
|
||||
reclaimed
|
||||
|
||||
Signed-off-by: Alexandre Frade <kernel@xanmod.org>
|
||||
---
|
||||
@@ -17,6 +17,6 @@ Signed-off-by: Alexandre Frade <kernel@xanmod.org>
|
||||
*/
|
||||
-static int sysctl_vfs_cache_pressure __read_mostly = 100;
|
||||
+static int sysctl_vfs_cache_pressure __read_mostly = 50;
|
||||
static int sysctl_vfs_cache_pressure_denom __read_mostly = 100;
|
||||
|
||||
unsigned long vfs_pressure_ratio(unsigned long val)
|
||||
{
|
@@ -1,7 +1,7 @@
|
||||
From 653701587608c8113dd4c941526104cea83d697e Mon Sep 17 00:00:00 2001
|
||||
From 5de3ee4e997355118cbdc6249afebdf3bef3129a Mon Sep 17 00:00:00 2001
|
||||
From: Alexandre Frade <kernel@xanmod.org>
|
||||
Date: Sun, 28 Apr 2024 09:06:54 +0000
|
||||
Subject: [PATCH 13/19] XANMOD: mm: Raise max_map_count default value
|
||||
Subject: [PATCH 14/20] XANMOD: mm: Raise max_map_count default value
|
||||
|
||||
Signed-off-by: Alexandre Frade <kernel@xanmod.org>
|
||||
---
|
||||
@@ -11,7 +11,7 @@ Signed-off-by: Alexandre Frade <kernel@xanmod.org>
|
||||
|
||||
--- a/Documentation/admin-guide/sysctl/vm.rst
|
||||
+++ b/Documentation/admin-guide/sysctl/vm.rst
|
||||
@@ -470,7 +470,7 @@ While most applications need less than a
|
||||
@@ -477,7 +477,7 @@ While most applications need less than a
|
||||
programs, particularly malloc debuggers, may consume lots of them,
|
||||
e.g., up to one or two maps per allocation.
|
||||
|
@@ -1,7 +1,7 @@
|
||||
From 7ede458d310744257808696e599b8e9b11333dd0 Mon Sep 17 00:00:00 2001
|
||||
From 5dc99919f35bd759762b4ebaee55cead67f80680 Mon Sep 17 00:00:00 2001
|
||||
From: Alexandre Frade <kernel@xanmod.org>
|
||||
Date: Fri, 30 May 2025 19:58:58 +0000
|
||||
Subject: [PATCH 14/19] XANMOD: mm/vmscan: Reduce amount of swapping
|
||||
Subject: [PATCH 15/20] XANMOD: mm/vmscan: Reduce amount of swapping
|
||||
|
||||
Signed-off-by: Alexandre Frade <kernel@xanmod.org>
|
||||
---
|
@@ -1,7 +1,7 @@
|
||||
From d5b37aa9862773c0cdf95676cc15d97416311ba2 Mon Sep 17 00:00:00 2001
|
||||
From 80ef8da328db02245f16e1873466a8cec86b4846 Mon Sep 17 00:00:00 2001
|
||||
From: Alexandre Frade <kernel@xanmod.org>
|
||||
Date: Wed, 15 Jun 2022 17:07:29 +0000
|
||||
Subject: [PATCH 15/19] XANMOD: sched/autogroup: Add kernel parameter and
|
||||
Subject: [PATCH 16/20] XANMOD: sched/autogroup: Add kernel parameter and
|
||||
config option to enable/disable autogroup feature by default
|
||||
|
||||
Signed-off-by: Alexandre Frade <kernel@xanmod.org>
|
||||
@@ -13,7 +13,7 @@ Signed-off-by: Alexandre Frade <kernel@xanmod.org>
|
||||
|
||||
--- a/Documentation/admin-guide/kernel-parameters.txt
|
||||
+++ b/Documentation/admin-guide/kernel-parameters.txt
|
||||
@@ -507,6 +507,10 @@
|
||||
@@ -510,6 +510,10 @@
|
||||
Format: <int> (must be >=0)
|
||||
Default: 64
|
||||
|
||||
@@ -24,7 +24,7 @@ Signed-off-by: Alexandre Frade <kernel@xanmod.org>
|
||||
bau= [X86_UV] Enable the BAU on SGI UV. The default
|
||||
behavior is to disable the BAU (i.e. bau=0).
|
||||
Format: { "0" | "1" }
|
||||
@@ -4086,8 +4090,6 @@
|
||||
@@ -4129,8 +4133,6 @@
|
||||
|
||||
noapictimer [APIC,X86] Don't set up the APIC timer
|
||||
|
||||
@@ -35,7 +35,7 @@ Signed-off-by: Alexandre Frade <kernel@xanmod.org>
|
||||
no_console_suspend
|
||||
--- a/init/Kconfig
|
||||
+++ b/init/Kconfig
|
||||
@@ -1374,6 +1374,18 @@ config SCHED_AUTOGROUP
|
||||
@@ -1393,6 +1393,18 @@ config SCHED_AUTOGROUP
|
||||
desktop applications. Task group autogeneration is currently based
|
||||
upon task session.
|
||||
|
@@ -1,7 +1,7 @@
|
||||
From 475f127d322b1fe12a8f486e779ec60cc03220bc Mon Sep 17 00:00:00 2001
|
||||
From df6c5af7be4fdfd1beadc3d9ef5b573717b049e1 Mon Sep 17 00:00:00 2001
|
||||
From: Alexandre Frade <kernel@xanmod.org>
|
||||
Date: Tue, 31 Mar 2020 13:32:08 -0300
|
||||
Subject: [PATCH 16/19] XANMOD: cpufreq: tunes ondemand and conservative
|
||||
Subject: [PATCH 17/20] XANMOD: cpufreq: tunes ondemand and conservative
|
||||
governor for performance
|
||||
|
||||
Signed-off-by: Alexandre Frade <kernel@xanmod.org>
|
@@ -1,7 +1,7 @@
|
||||
From ecbc96ba0c56aa4c94c1a4bcb3184cc79fad1d3d Mon Sep 17 00:00:00 2001
|
||||
From b203a73ce3d1b097117180903b33f37ae83e4a94 Mon Sep 17 00:00:00 2001
|
||||
From: Alexandre Frade <kernel@xanmod.org>
|
||||
Date: Mon, 16 Sep 2024 08:09:56 +0000
|
||||
Subject: [PATCH 17/19] XANMOD: lib/kconfig.debug: disable default
|
||||
Subject: [PATCH 18/20] XANMOD: lib/kconfig.debug: disable default
|
||||
SYMBOLIC_ERRNAME and DEBUG_BUGVERBOSE
|
||||
|
||||
Signed-off-by: Alexandre Frade <kernel@xanmod.org>
|
@@ -1,7 +1,7 @@
|
||||
From 3606c4614583729c8471c98d171d42ff895b38c4 Mon Sep 17 00:00:00 2001
|
||||
From 9a67710027ff742f3dfd21e32318d056b0c490f3 Mon Sep 17 00:00:00 2001
|
||||
From: Alexandre Frade <kernel@xanmod.org>
|
||||
Date: Sun, 29 May 2022 00:57:40 +0000
|
||||
Subject: [PATCH 18/19] XANMOD: scripts/setlocalversion: remove "+" tag for git
|
||||
Subject: [PATCH 19/20] XANMOD: scripts/setlocalversion: remove '+' tag for git
|
||||
repo short version
|
||||
|
||||
Signed-off-by: Alexandre Frade <kernel@xanmod.org>
|
@@ -1,7 +1,7 @@
|
||||
From 927fa9cd3d3b0a6f65c44d492c263d0669ec4b7e Mon Sep 17 00:00:00 2001
|
||||
From d034a044b32f6b646b9b5f798d1b707b2ba02dfc Mon Sep 17 00:00:00 2001
|
||||
From: Alexandre Frade <kernel@xanmod.org>
|
||||
Date: Mon, 24 Apr 2023 04:50:34 +0000
|
||||
Subject: [PATCH 19/19] XANMOD: scripts/setlocalversion: Move localversion*
|
||||
Subject: [PATCH 20/20] XANMOD: scripts/setlocalversion: Move localversion*
|
||||
files to the end
|
||||
|
||||
Signed-off-by: Alexandre Frade <kernel@xanmod.org>
|
Reference in New Issue
Block a user