add 3rd party/custom patches
3rd patchs (in alphabetical order): - bbr3 - ntsync5 - openwrt - pf-kernel - xanmod - zen no configuration changes for now
This commit is contained in:
parent
8082dfeaca
commit
8cbaf1dea2
61
debian/bin/genpatch-pfkernel
vendored
Executable file
61
debian/bin/genpatch-pfkernel
vendored
Executable file
@ -0,0 +1,61 @@
|
||||
#!/bin/sh
|
||||
set -ef
|
||||
|
||||
export GIT_OPTIONAL_LOCKS=0
|
||||
|
||||
w=$(git rev-parse --path-format=absolute --show-toplevel) ; : "${w:?}" ; cd "$w"
|
||||
|
||||
dst='debian/patches/pf'
|
||||
src='../linux-extras'
|
||||
branches='amd-pstate amd-rapl cpuidle crypto fixes ksm zstd'
|
||||
|
||||
[ -d "${dst}" ]
|
||||
|
||||
kver=
|
||||
if [ -n "$1" ] ; then
|
||||
kver="$1"
|
||||
else
|
||||
kver=$(dpkg-parsechangelog --show-field=Version | sed -E 's/^[0-9]+://;s/-[^-]*$//' | cut -d. -f1-2)
|
||||
fi
|
||||
from="upstream/linux-${kver}.y"
|
||||
|
||||
t=$(mktemp -d) ; : "${t:?}"
|
||||
|
||||
cp -ar "${src}" "$t/"
|
||||
cd "$t/${src##*/}"
|
||||
|
||||
git config advice.skippedCherryPicks false
|
||||
|
||||
for b in ${branches} ; do
|
||||
ref="pf/$b-${kver}"
|
||||
r="tmp-rebase-$b"
|
||||
|
||||
git switch --detach "${ref}"
|
||||
git switch -C "$r"
|
||||
|
||||
if git rebase "${from}" ; then
|
||||
[ -d "$w/${dst}/$b/" ] || mkdir -p "$w/${dst}/$b"
|
||||
|
||||
set +e
|
||||
env -C "$w" git ls-files -z | grep -zF "${dst}/$b/" | grep -zFv '/.' | env -C "$w" -u GIT_OPTIONAL_LOCKS xargs -r -0 git rm -f
|
||||
find "$w/${dst}/$b/" -name '*.patch' -type f -exec rm -f {} +
|
||||
set -e
|
||||
|
||||
git format-patch -N --subject-prefix='' --output-directory "$w/${dst}/$b" "${from}..$r"
|
||||
else
|
||||
echo >&2
|
||||
git rebase --abort
|
||||
echo >&2
|
||||
fi
|
||||
|
||||
git switch -q --detach "${ref}"
|
||||
git branch -D "$r"
|
||||
echo >&2
|
||||
done
|
||||
|
||||
cd "$w" ; rm -rf "$t"
|
||||
|
||||
echo >&2
|
||||
echo 'put in debian/patches/series' >&2
|
||||
echo >&2
|
||||
find "${dst}/" -type f -name '*.patch' | sed -E 's#^debian/patches/##' | sort -V
|
52
debian/patches/krd/0001-Revert-objtool-dont-fail-the-kernel-build-on-fatal-errors.patch
vendored
Normal file
52
debian/patches/krd/0001-Revert-objtool-dont-fail-the-kernel-build-on-fatal-errors.patch
vendored
Normal file
@ -0,0 +1,52 @@
|
||||
this reverts following commit:
|
||||
|
||||
From: Josh Poimboeuf <jpoimboe@redhat.com>
|
||||
Date: Thu, 14 Jan 2021 16:32:42 -0600
|
||||
Subject: objtool: Don't fail the kernel build on fatal errors
|
||||
|
||||
[ Upstream commit 655cf86548a3938538642a6df27dd359e13c86bd ]
|
||||
|
||||
This is basically a revert of commit 644592d32837 ("objtool: Fail the
|
||||
kernel build on fatal errors").
|
||||
|
||||
That change turned out to be more trouble than it's worth. Failing the
|
||||
build is an extreme measure which sometimes gets too much attention and
|
||||
blocks CI build testing.
|
||||
|
||||
These fatal-type warnings aren't yet as rare as we'd hope, due to the
|
||||
ever-increasing matrix of supported toolchains/plugins and their
|
||||
fast-changing nature as of late.
|
||||
|
||||
Also, there are more people (and bots) looking for objtool warnings than
|
||||
ever before, so even non-fatal warnings aren't likely to be ignored for
|
||||
long.
|
||||
|
||||
Suggested-by: Nick Desaulniers <ndesaulniers@google.com>
|
||||
Reviewed-by: Miroslav Benes <mbenes@suse.cz>
|
||||
Reviewed-by: Nick Desaulniers <ndesaulniers@google.com>
|
||||
Reviewed-by: Kamalesh Babulal <kamalesh@linux.vnet.ibm.com>
|
||||
Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
|
||||
Signed-off-by: Sasha Levin <sashal@kernel.org>
|
||||
|
||||
--- a/tools/objtool/check.c
|
||||
+++ b/tools/objtool/check.c
|
||||
@@ -4872,10 +4872,14 @@ int check(struct objtool_file *file)
|
||||
}
|
||||
|
||||
out:
|
||||
- /*
|
||||
- * For now, don't fail the kernel build on fatal warnings. These
|
||||
- * errors are still fairly common due to the growing matrix of
|
||||
- * supported toolchains and their recent pace of change.
|
||||
- */
|
||||
+ if (ret < 0) {
|
||||
+ /*
|
||||
+ * Fatal error. The binary is corrupt or otherwise broken in
|
||||
+ * some way, or objtool itself is broken. Fail the kernel
|
||||
+ * build.
|
||||
+ */
|
||||
+ return ret;
|
||||
+ }
|
||||
+
|
||||
return 0;
|
||||
}
|
11
debian/patches/krd/0002-established-timeout.patch
vendored
Normal file
11
debian/patches/krd/0002-established-timeout.patch
vendored
Normal file
@ -0,0 +1,11 @@
|
||||
--- a/net/netfilter/nf_conntrack_proto_tcp.c
|
||||
+++ b/net/netfilter/nf_conntrack_proto_tcp.c
|
||||
@@ -61,7 +61,7 @@ enum nf_ct_tcp_action {
|
||||
static const unsigned int tcp_timeouts[TCP_CONNTRACK_TIMEOUT_MAX] = {
|
||||
[TCP_CONNTRACK_SYN_SENT] = 2 MINS,
|
||||
[TCP_CONNTRACK_SYN_RECV] = 60 SECS,
|
||||
- [TCP_CONNTRACK_ESTABLISHED] = 5 DAYS,
|
||||
+ [TCP_CONNTRACK_ESTABLISHED] = 128 MINS,
|
||||
[TCP_CONNTRACK_FIN_WAIT] = 2 MINS,
|
||||
[TCP_CONNTRACK_CLOSE_WAIT] = 60 SECS,
|
||||
[TCP_CONNTRACK_LAST_ACK] = 30 SECS,
|
11
debian/patches/krd/0003-local-ports.patch
vendored
Normal file
11
debian/patches/krd/0003-local-ports.patch
vendored
Normal file
@ -0,0 +1,11 @@
|
||||
--- a/net/ipv4/af_inet.c
|
||||
+++ b/net/ipv4/af_inet.c
|
||||
@@ -1802,7 +1802,7 @@ static __net_init int inet_init_net(stru
|
||||
/*
|
||||
* Set defaults for local port range
|
||||
*/
|
||||
- net->ipv4.ip_local_ports.range = 60999u << 16 | 32768u;
|
||||
+ net->ipv4.ip_local_ports.range = 65533u << 16 | 49152u;
|
||||
|
||||
seqlock_init(&net->ipv4.ping_group_range.lock);
|
||||
/*
|
37
debian/patches/krd/0004-bridge-group_fwd_mask.patch
vendored
Normal file
37
debian/patches/krd/0004-bridge-group_fwd_mask.patch
vendored
Normal file
@ -0,0 +1,37 @@
|
||||
--- a/net/bridge/br_input.c
|
||||
+++ b/net/bridge/br_input.c
|
||||
@@ -374,7 +374,11 @@ static rx_handler_result_t br_handle_fra
|
||||
return RX_HANDLER_PASS;
|
||||
|
||||
case 0x01: /* IEEE MAC (Pause) */
|
||||
- goto drop;
|
||||
+ fwd_mask |= p->br->group_fwd_mask;
|
||||
+ if (fwd_mask & (1u << dest[5]))
|
||||
+ goto forward;
|
||||
+ else
|
||||
+ goto drop;
|
||||
|
||||
case 0x0E: /* 802.1AB LLDP */
|
||||
fwd_mask |= p->br->group_fwd_mask;
|
||||
--- a/net/bridge/br_netlink.c
|
||||
+++ b/net/bridge/br_netlink.c
|
||||
@@ -1365,8 +1365,6 @@ static int br_changelink(struct net_devi
|
||||
if (data[IFLA_BR_GROUP_FWD_MASK]) {
|
||||
u16 fwd_mask = nla_get_u16(data[IFLA_BR_GROUP_FWD_MASK]);
|
||||
|
||||
- if (fwd_mask & BR_GROUPFWD_RESTRICTED)
|
||||
- return -EINVAL;
|
||||
br->group_fwd_mask = fwd_mask;
|
||||
}
|
||||
|
||||
--- a/net/bridge/br_sysfs_br.c
|
||||
+++ b/net/bridge/br_sysfs_br.c
|
||||
@@ -179,8 +179,6 @@ static ssize_t group_fwd_mask_show(struc
|
||||
static int set_group_fwd_mask(struct net_bridge *br, unsigned long val,
|
||||
struct netlink_ext_ack *extack)
|
||||
{
|
||||
- if (val & BR_GROUPFWD_RESTRICTED)
|
||||
- return -EINVAL;
|
||||
|
||||
br->group_fwd_mask = val;
|
||||
|
52
debian/patches/misc-bbr3/0001-net-tcp_bbr-broaden-app-limited-rate-sample-detectio.patch
vendored
Normal file
52
debian/patches/misc-bbr3/0001-net-tcp_bbr-broaden-app-limited-rate-sample-detectio.patch
vendored
Normal file
@ -0,0 +1,52 @@
|
||||
From ce1cd7869a208112a8728d1fe9e373f78a2e4a6e Mon Sep 17 00:00:00 2001
|
||||
From: Neal Cardwell <ncardwell@google.com>
|
||||
Date: Tue, 11 Jun 2019 12:26:55 -0400
|
||||
Subject: [PATCH 01/19] net-tcp_bbr: broaden app-limited rate sample detection
|
||||
|
||||
This commit is a bug fix for the Linux TCP app-limited
|
||||
(application-limited) logic that is used for collecting rate
|
||||
(bandwidth) samples.
|
||||
|
||||
Previously the app-limited logic only looked for "bubbles" of
|
||||
silence in between application writes, by checking at the start
|
||||
of each sendmsg. But "bubbles" of silence can also happen before
|
||||
retransmits: e.g. bubbles can happen between an application write
|
||||
and a retransmit, or between two retransmits.
|
||||
|
||||
Retransmits are triggered by ACKs or timers. So this commit checks
|
||||
for bubbles of app-limited silence upon ACKs or timers.
|
||||
|
||||
Why does this commit check for app-limited state at the start of
|
||||
ACKs and timer handling? Because at that point we know whether
|
||||
inflight was fully using the cwnd. During processing the ACK or
|
||||
timer event we often change the cwnd; after changing the cwnd we
|
||||
can't know whether inflight was fully using the old cwnd.
|
||||
|
||||
Origin-9xx-SHA1: 3fe9b53291e018407780fb8c356adb5666722cbc
|
||||
Change-Id: I37221506f5166877c2b110753d39bb0757985e68
|
||||
Signed-off-by: Alexandre Frade <kernel@xanmod.org>
|
||||
---
|
||||
net/ipv4/tcp_input.c | 1 +
|
||||
net/ipv4/tcp_timer.c | 1 +
|
||||
2 files changed, 2 insertions(+)
|
||||
|
||||
--- a/net/ipv4/tcp_input.c
|
||||
+++ b/net/ipv4/tcp_input.c
|
||||
@@ -3961,6 +3961,7 @@ static int tcp_ack(struct sock *sk, cons
|
||||
|
||||
prior_fack = tcp_is_sack(tp) ? tcp_highest_sack_seq(tp) : tp->snd_una;
|
||||
rs.prior_in_flight = tcp_packets_in_flight(tp);
|
||||
+ tcp_rate_check_app_limited(sk);
|
||||
|
||||
/* ts_recent update must be made after we are sure that the packet
|
||||
* is in window.
|
||||
--- a/net/ipv4/tcp_timer.c
|
||||
+++ b/net/ipv4/tcp_timer.c
|
||||
@@ -689,6 +689,7 @@ void tcp_write_timer_handler(struct sock
|
||||
return;
|
||||
}
|
||||
|
||||
+ tcp_rate_check_app_limited(sk);
|
||||
tcp_mstamp_refresh(tcp_sk(sk));
|
||||
event = icsk->icsk_pending;
|
||||
|
74
debian/patches/misc-bbr3/0002-net-tcp_bbr-v2-shrink-delivered_mstamp-first_tx_msta.patch
vendored
Normal file
74
debian/patches/misc-bbr3/0002-net-tcp_bbr-v2-shrink-delivered_mstamp-first_tx_msta.patch
vendored
Normal file
@ -0,0 +1,74 @@
|
||||
From b32715fbe2ab96d1060ec37bb9c03feedf366494 Mon Sep 17 00:00:00 2001
|
||||
From: Neal Cardwell <ncardwell@google.com>
|
||||
Date: Sun, 24 Jun 2018 21:55:59 -0400
|
||||
Subject: [PATCH 02/19] net-tcp_bbr: v2: shrink delivered_mstamp,
|
||||
first_tx_mstamp to u32 to free up 8 bytes
|
||||
|
||||
Free up some space for tracking inflight and losses for each
|
||||
bw sample, in upcoming commits.
|
||||
|
||||
These timestamps are in microseconds, and are now stored in 32
|
||||
bits. So they can only hold time intervals up to roughly 2^12 = 4096
|
||||
seconds. But Linux TCP RTT and RTO tracking has the same 32-bit
|
||||
microsecond implementation approach and resulting deployment
|
||||
limitations. So this is not introducing a new limit. And these should
|
||||
not be a limitation for the foreseeable future.
|
||||
|
||||
Effort: net-tcp_bbr
|
||||
Origin-9xx-SHA1: 238a7e6b5d51625fef1ce7769826a7b21b02ae55
|
||||
Change-Id: I3b779603797263b52a61ad57c565eb91fe42680c
|
||||
Signed-off-by: Alexandre Frade <kernel@xanmod.org>
|
||||
---
|
||||
include/net/tcp.h | 9 +++++++--
|
||||
net/ipv4/tcp_rate.c | 7 ++++---
|
||||
2 files changed, 11 insertions(+), 5 deletions(-)
|
||||
|
||||
--- a/include/net/tcp.h
|
||||
+++ b/include/net/tcp.h
|
||||
@@ -884,6 +884,11 @@ static inline u32 tcp_stamp_us_delta(u64
|
||||
return max_t(s64, t1 - t0, 0);
|
||||
}
|
||||
|
||||
+static inline u32 tcp_stamp32_us_delta(u32 t1, u32 t0)
|
||||
+{
|
||||
+ return max_t(s32, t1 - t0, 0);
|
||||
+}
|
||||
+
|
||||
/* provide the departure time in us unit */
|
||||
static inline u64 tcp_skb_timestamp_us(const struct sk_buff *skb)
|
||||
{
|
||||
@@ -973,9 +978,9 @@ struct tcp_skb_cb {
|
||||
/* pkts S/ACKed so far upon tx of skb, incl retrans: */
|
||||
__u32 delivered;
|
||||
/* start of send pipeline phase */
|
||||
- u64 first_tx_mstamp;
|
||||
+ u32 first_tx_mstamp;
|
||||
/* when we reached the "delivered" count */
|
||||
- u64 delivered_mstamp;
|
||||
+ u32 delivered_mstamp;
|
||||
} tx; /* only used for outgoing skbs */
|
||||
union {
|
||||
struct inet_skb_parm h4;
|
||||
--- a/net/ipv4/tcp_rate.c
|
||||
+++ b/net/ipv4/tcp_rate.c
|
||||
@@ -101,8 +101,9 @@ void tcp_rate_skb_delivered(struct sock
|
||||
/* Record send time of most recently ACKed packet: */
|
||||
tp->first_tx_mstamp = tx_tstamp;
|
||||
/* Find the duration of the "send phase" of this window: */
|
||||
- rs->interval_us = tcp_stamp_us_delta(tp->first_tx_mstamp,
|
||||
- scb->tx.first_tx_mstamp);
|
||||
+ rs->interval_us = tcp_stamp32_us_delta(
|
||||
+ tp->first_tx_mstamp,
|
||||
+ scb->tx.first_tx_mstamp);
|
||||
|
||||
}
|
||||
/* Mark off the skb delivered once it's sacked to avoid being
|
||||
@@ -155,7 +156,7 @@ void tcp_rate_gen(struct sock *sk, u32 d
|
||||
* longer phase.
|
||||
*/
|
||||
snd_us = rs->interval_us; /* send phase */
|
||||
- ack_us = tcp_stamp_us_delta(tp->tcp_mstamp,
|
||||
+ ack_us = tcp_stamp32_us_delta(tp->tcp_mstamp,
|
||||
rs->prior_mstamp); /* ack phase */
|
||||
rs->interval_us = max(snd_us, ack_us);
|
||||
|
109
debian/patches/misc-bbr3/0003-net-tcp_bbr-v2-snapshot-packets-in-flight-at-transmi.patch
vendored
Normal file
109
debian/patches/misc-bbr3/0003-net-tcp_bbr-v2-snapshot-packets-in-flight-at-transmi.patch
vendored
Normal file
@ -0,0 +1,109 @@
|
||||
From 25856231832186fe13189b986cc0e91860c18201 Mon Sep 17 00:00:00 2001
|
||||
From: Neal Cardwell <ncardwell@google.com>
|
||||
Date: Sat, 5 Aug 2017 11:49:50 -0400
|
||||
Subject: [PATCH 03/19] net-tcp_bbr: v2: snapshot packets in flight at transmit
|
||||
time and pass in rate_sample
|
||||
|
||||
CC algorithms may want to snapshot the number of packets in flight at
|
||||
transmit time and pass in rate_sample, to understand the relationship
|
||||
between inflight and losses or ECN signals, to try to find the highest
|
||||
inflight value that has acceptable levels of loss/ECN marking.
|
||||
|
||||
We split out the code to set an skb's tx.in_flight field into its own
|
||||
function, so that this code can be used for the TCP_REPAIR "fake send"
|
||||
code path that inserts skbs into the rtx queue without sending them.
|
||||
|
||||
Effort: net-tcp_bbr
|
||||
Origin-9xx-SHA1: b3eb4f2d20efab4ca001f32c9294739036c493ea
|
||||
Origin-9xx-SHA1: e880fc907d06ea7354333f60f712748ebce9497b
|
||||
Origin-9xx-SHA1: 330f825a08a6fe92cef74d799cc468864c479f63
|
||||
Change-Id: I7314047d0ff14dd261a04b1969a46dc658c8836a
|
||||
Signed-off-by: Alexandre Frade <kernel@xanmod.org>
|
||||
---
|
||||
include/net/tcp.h | 6 ++++++
|
||||
net/ipv4/tcp_output.c | 1 +
|
||||
net/ipv4/tcp_rate.c | 20 ++++++++++++++++++++
|
||||
3 files changed, 27 insertions(+)
|
||||
|
||||
--- a/include/net/tcp.h
|
||||
+++ b/include/net/tcp.h
|
||||
@@ -981,6 +981,10 @@ struct tcp_skb_cb {
|
||||
u32 first_tx_mstamp;
|
||||
/* when we reached the "delivered" count */
|
||||
u32 delivered_mstamp;
|
||||
+#define TCPCB_IN_FLIGHT_BITS 20
|
||||
+#define TCPCB_IN_FLIGHT_MAX ((1U << TCPCB_IN_FLIGHT_BITS) - 1)
|
||||
+ u32 in_flight:20, /* packets in flight at transmit */
|
||||
+ unused2:12;
|
||||
} tx; /* only used for outgoing skbs */
|
||||
union {
|
||||
struct inet_skb_parm h4;
|
||||
@@ -1136,6 +1140,7 @@ struct rate_sample {
|
||||
u64 prior_mstamp; /* starting timestamp for interval */
|
||||
u32 prior_delivered; /* tp->delivered at "prior_mstamp" */
|
||||
u32 prior_delivered_ce;/* tp->delivered_ce at "prior_mstamp" */
|
||||
+ u32 tx_in_flight; /* packets in flight at starting timestamp */
|
||||
s32 delivered; /* number of packets delivered over interval */
|
||||
s32 delivered_ce; /* number of packets delivered w/ CE marks*/
|
||||
long interval_us; /* time for tp->delivered to incr "delivered" */
|
||||
@@ -1258,6 +1263,7 @@ static inline void tcp_ca_event(struct s
|
||||
void tcp_set_ca_state(struct sock *sk, const u8 ca_state);
|
||||
|
||||
/* From tcp_rate.c */
|
||||
+void tcp_set_tx_in_flight(struct sock *sk, struct sk_buff *skb);
|
||||
void tcp_rate_skb_sent(struct sock *sk, struct sk_buff *skb);
|
||||
void tcp_rate_skb_delivered(struct sock *sk, struct sk_buff *skb,
|
||||
struct rate_sample *rs);
|
||||
--- a/net/ipv4/tcp_output.c
|
||||
+++ b/net/ipv4/tcp_output.c
|
||||
@@ -2765,6 +2765,7 @@ static bool tcp_write_xmit(struct sock *
|
||||
skb_set_delivery_time(skb, tp->tcp_wstamp_ns, SKB_CLOCK_MONOTONIC);
|
||||
list_move_tail(&skb->tcp_tsorted_anchor, &tp->tsorted_sent_queue);
|
||||
tcp_init_tso_segs(skb, mss_now);
|
||||
+ tcp_set_tx_in_flight(sk, skb);
|
||||
goto repair; /* Skip network transmission */
|
||||
}
|
||||
|
||||
--- a/net/ipv4/tcp_rate.c
|
||||
+++ b/net/ipv4/tcp_rate.c
|
||||
@@ -34,6 +34,24 @@
|
||||
* ready to send in the write queue.
|
||||
*/
|
||||
|
||||
+void tcp_set_tx_in_flight(struct sock *sk, struct sk_buff *skb)
|
||||
+{
|
||||
+ struct tcp_sock *tp = tcp_sk(sk);
|
||||
+ u32 in_flight;
|
||||
+
|
||||
+ /* Check, sanitize, and record packets in flight after skb was sent. */
|
||||
+ in_flight = tcp_packets_in_flight(tp) + tcp_skb_pcount(skb);
|
||||
+ if (WARN_ONCE(in_flight > TCPCB_IN_FLIGHT_MAX,
|
||||
+ "insane in_flight %u cc %s mss %u "
|
||||
+ "cwnd %u pif %u %u %u %u\n",
|
||||
+ in_flight, inet_csk(sk)->icsk_ca_ops->name,
|
||||
+ tp->mss_cache, tp->snd_cwnd,
|
||||
+ tp->packets_out, tp->retrans_out,
|
||||
+ tp->sacked_out, tp->lost_out))
|
||||
+ in_flight = TCPCB_IN_FLIGHT_MAX;
|
||||
+ TCP_SKB_CB(skb)->tx.in_flight = in_flight;
|
||||
+}
|
||||
+
|
||||
/* Snapshot the current delivery information in the skb, to generate
|
||||
* a rate sample later when the skb is (s)acked in tcp_rate_skb_delivered().
|
||||
*/
|
||||
@@ -67,6 +85,7 @@ void tcp_rate_skb_sent(struct sock *sk,
|
||||
TCP_SKB_CB(skb)->tx.delivered = tp->delivered;
|
||||
TCP_SKB_CB(skb)->tx.delivered_ce = tp->delivered_ce;
|
||||
TCP_SKB_CB(skb)->tx.is_app_limited = tp->app_limited ? 1 : 0;
|
||||
+ tcp_set_tx_in_flight(sk, skb);
|
||||
}
|
||||
|
||||
/* When an skb is sacked or acked, we fill in the rate sample with the (prior)
|
||||
@@ -96,6 +115,7 @@ void tcp_rate_skb_delivered(struct sock
|
||||
rs->prior_mstamp = scb->tx.delivered_mstamp;
|
||||
rs->is_app_limited = scb->tx.is_app_limited;
|
||||
rs->is_retrans = scb->sacked & TCPCB_RETRANS;
|
||||
+ rs->tx_in_flight = scb->tx.in_flight;
|
||||
rs->last_end_seq = scb->end_seq;
|
||||
|
||||
/* Record send time of most recently ACKed packet: */
|
70
debian/patches/misc-bbr3/0004-net-tcp_bbr-v2-count-packets-lost-over-TCP-rate-samp.patch
vendored
Normal file
70
debian/patches/misc-bbr3/0004-net-tcp_bbr-v2-count-packets-lost-over-TCP-rate-samp.patch
vendored
Normal file
@ -0,0 +1,70 @@
|
||||
From b1772710e8b5b98c09e96d4f1af620cd938fddf7 Mon Sep 17 00:00:00 2001
|
||||
From: Neal Cardwell <ncardwell@google.com>
|
||||
Date: Thu, 12 Oct 2017 23:44:27 -0400
|
||||
Subject: [PATCH 04/19] net-tcp_bbr: v2: count packets lost over TCP rate
|
||||
sampling interval
|
||||
|
||||
For understanding the relationship between inflight and packet loss
|
||||
signals, to try to find the highest inflight value that has acceptable
|
||||
levels of packet losses.
|
||||
|
||||
Effort: net-tcp_bbr
|
||||
Origin-9xx-SHA1: 4527e26b2bd7756a88b5b9ef1ada3da33dd609ab
|
||||
Change-Id: I594c2500868d9c530770e7ddd68ffc87c57f4fd5
|
||||
Signed-off-by: Alexandre Frade <kernel@xanmod.org>
|
||||
---
|
||||
include/net/tcp.h | 5 ++++-
|
||||
net/ipv4/tcp_rate.c | 3 +++
|
||||
2 files changed, 7 insertions(+), 1 deletion(-)
|
||||
|
||||
--- a/include/net/tcp.h
|
||||
+++ b/include/net/tcp.h
|
||||
@@ -985,6 +985,7 @@ struct tcp_skb_cb {
|
||||
#define TCPCB_IN_FLIGHT_MAX ((1U << TCPCB_IN_FLIGHT_BITS) - 1)
|
||||
u32 in_flight:20, /* packets in flight at transmit */
|
||||
unused2:12;
|
||||
+ u32 lost; /* packets lost so far upon tx of skb */
|
||||
} tx; /* only used for outgoing skbs */
|
||||
union {
|
||||
struct inet_skb_parm h4;
|
||||
@@ -1138,11 +1139,13 @@ struct ack_sample {
|
||||
*/
|
||||
struct rate_sample {
|
||||
u64 prior_mstamp; /* starting timestamp for interval */
|
||||
+ u32 prior_lost; /* tp->lost at "prior_mstamp" */
|
||||
u32 prior_delivered; /* tp->delivered at "prior_mstamp" */
|
||||
u32 prior_delivered_ce;/* tp->delivered_ce at "prior_mstamp" */
|
||||
u32 tx_in_flight; /* packets in flight at starting timestamp */
|
||||
+ s32 lost; /* number of packets lost over interval */
|
||||
s32 delivered; /* number of packets delivered over interval */
|
||||
- s32 delivered_ce; /* number of packets delivered w/ CE marks*/
|
||||
+ s32 delivered_ce; /* packets delivered w/ CE mark over interval */
|
||||
long interval_us; /* time for tp->delivered to incr "delivered" */
|
||||
u32 snd_interval_us; /* snd interval for delivered packets */
|
||||
u32 rcv_interval_us; /* rcv interval for delivered packets */
|
||||
--- a/net/ipv4/tcp_rate.c
|
||||
+++ b/net/ipv4/tcp_rate.c
|
||||
@@ -84,6 +84,7 @@ void tcp_rate_skb_sent(struct sock *sk,
|
||||
TCP_SKB_CB(skb)->tx.delivered_mstamp = tp->delivered_mstamp;
|
||||
TCP_SKB_CB(skb)->tx.delivered = tp->delivered;
|
||||
TCP_SKB_CB(skb)->tx.delivered_ce = tp->delivered_ce;
|
||||
+ TCP_SKB_CB(skb)->tx.lost = tp->lost;
|
||||
TCP_SKB_CB(skb)->tx.is_app_limited = tp->app_limited ? 1 : 0;
|
||||
tcp_set_tx_in_flight(sk, skb);
|
||||
}
|
||||
@@ -110,6 +111,7 @@ void tcp_rate_skb_delivered(struct sock
|
||||
if (!rs->prior_delivered ||
|
||||
tcp_skb_sent_after(tx_tstamp, tp->first_tx_mstamp,
|
||||
scb->end_seq, rs->last_end_seq)) {
|
||||
+ rs->prior_lost = scb->tx.lost;
|
||||
rs->prior_delivered_ce = scb->tx.delivered_ce;
|
||||
rs->prior_delivered = scb->tx.delivered;
|
||||
rs->prior_mstamp = scb->tx.delivered_mstamp;
|
||||
@@ -165,6 +167,7 @@ void tcp_rate_gen(struct sock *sk, u32 d
|
||||
return;
|
||||
}
|
||||
rs->delivered = tp->delivered - rs->prior_delivered;
|
||||
+ rs->lost = tp->lost - rs->prior_lost;
|
||||
|
||||
rs->delivered_ce = tp->delivered_ce - rs->prior_delivered_ce;
|
||||
/* delivered_ce occupies less than 32 bits in the skb control block */
|
38
debian/patches/misc-bbr3/0005-net-tcp_bbr-v2-export-FLAG_ECE-in-rate_sample.is_ece.patch
vendored
Normal file
38
debian/patches/misc-bbr3/0005-net-tcp_bbr-v2-export-FLAG_ECE-in-rate_sample.is_ece.patch
vendored
Normal file
@ -0,0 +1,38 @@
|
||||
From fdf01142aea8645186e080f1278d3b5a5fd8c66c Mon Sep 17 00:00:00 2001
|
||||
From: Neal Cardwell <ncardwell@google.com>
|
||||
Date: Mon, 19 Nov 2018 13:48:36 -0500
|
||||
Subject: [PATCH 05/19] net-tcp_bbr: v2: export FLAG_ECE in rate_sample.is_ece
|
||||
|
||||
For understanding the relationship between inflight and ECN signals,
|
||||
to try to find the highest inflight value that has acceptable levels
|
||||
ECN marking.
|
||||
|
||||
Effort: net-tcp_bbr
|
||||
Origin-9xx-SHA1: 3eba998f2898541406c2666781182200934965a8
|
||||
Change-Id: I3a964e04cee83e11649a54507043d2dfe769a3b3
|
||||
Signed-off-by: Alexandre Frade <kernel@xanmod.org>
|
||||
---
|
||||
include/net/tcp.h | 1 +
|
||||
net/ipv4/tcp_input.c | 1 +
|
||||
2 files changed, 2 insertions(+)
|
||||
|
||||
--- a/include/net/tcp.h
|
||||
+++ b/include/net/tcp.h
|
||||
@@ -1157,6 +1157,7 @@ struct rate_sample {
|
||||
bool is_app_limited; /* is sample from packet with bubble in pipe? */
|
||||
bool is_retrans; /* is sample from retransmission? */
|
||||
bool is_ack_delayed; /* is this (likely) a delayed ACK? */
|
||||
+ bool is_ece; /* did this ACK have ECN marked? */
|
||||
};
|
||||
|
||||
struct tcp_congestion_ops {
|
||||
--- a/net/ipv4/tcp_input.c
|
||||
+++ b/net/ipv4/tcp_input.c
|
||||
@@ -4060,6 +4060,7 @@ static int tcp_ack(struct sock *sk, cons
|
||||
delivered = tcp_newly_delivered(sk, delivered, flag);
|
||||
lost = tp->lost - lost; /* freshly marked lost */
|
||||
rs.is_ack_delayed = !!(flag & FLAG_ACK_MAYBE_DELAYED);
|
||||
+ rs.is_ece = !!(flag & FLAG_ECE);
|
||||
tcp_rate_gen(sk, delivered, lost, is_sack_reneg, sack_state.rate);
|
||||
tcp_cong_control(sk, ack, delivered, flag, sack_state.rate);
|
||||
tcp_xmit_recovery(sk, rexmit);
|
57
debian/patches/misc-bbr3/0006-net-tcp_bbr-v2-introduce-ca_ops-skb_marked_lost-CC-m.patch
vendored
Normal file
57
debian/patches/misc-bbr3/0006-net-tcp_bbr-v2-introduce-ca_ops-skb_marked_lost-CC-m.patch
vendored
Normal file
@ -0,0 +1,57 @@
|
||||
From a3e88432c2ebf12de9c2053a13417ddf2ad4cb4e Mon Sep 17 00:00:00 2001
|
||||
From: Neal Cardwell <ncardwell@google.com>
|
||||
Date: Tue, 7 Aug 2018 21:52:06 -0400
|
||||
Subject: [PATCH 06/19] net-tcp_bbr: v2: introduce ca_ops->skb_marked_lost() CC
|
||||
module callback API
|
||||
|
||||
For connections experiencing reordering, RACK can mark packets lost
|
||||
long after we receive the SACKs/ACKs hinting that the packets were
|
||||
actually lost.
|
||||
|
||||
This means that CC modules cannot easily learn the volume of inflight
|
||||
data at which packet loss happens by looking at the current inflight
|
||||
or even the packets in flight when the most recently SACKed packet was
|
||||
sent. To learn this, CC modules need to know how many packets were in
|
||||
flight at the time lost packets were sent. This new callback, combined
|
||||
with TCP_SKB_CB(skb)->tx.in_flight, allows them to learn this.
|
||||
|
||||
This also provides a consistent callback that is invoked whether
|
||||
packets are marked lost upon ACK processing, using the RACK reordering
|
||||
timer, or at RTO time.
|
||||
|
||||
Effort: net-tcp_bbr
|
||||
Origin-9xx-SHA1: afcbebe3374e4632ac6714d39e4dc8a8455956f4
|
||||
Change-Id: I54826ab53df636be537e5d3c618a46145d12d51a
|
||||
Signed-off-by: Alexandre Frade <kernel@xanmod.org>
|
||||
---
|
||||
include/net/tcp.h | 3 +++
|
||||
net/ipv4/tcp_input.c | 5 +++++
|
||||
2 files changed, 8 insertions(+)
|
||||
|
||||
--- a/include/net/tcp.h
|
||||
+++ b/include/net/tcp.h
|
||||
@@ -1184,6 +1184,9 @@ struct tcp_congestion_ops {
|
||||
/* override sysctl_tcp_min_tso_segs */
|
||||
u32 (*min_tso_segs)(struct sock *sk);
|
||||
|
||||
+ /* react to a specific lost skb (optional) */
|
||||
+ void (*skb_marked_lost)(struct sock *sk, const struct sk_buff *skb);
|
||||
+
|
||||
/* call when packets are delivered to update cwnd and pacing rate,
|
||||
* after all the ca_state processing. (optional)
|
||||
*/
|
||||
--- a/net/ipv4/tcp_input.c
|
||||
+++ b/net/ipv4/tcp_input.c
|
||||
@@ -1120,7 +1120,12 @@ static void tcp_verify_retransmit_hint(s
|
||||
*/
|
||||
static void tcp_notify_skb_loss_event(struct tcp_sock *tp, const struct sk_buff *skb)
|
||||
{
|
||||
+ struct sock *sk = (struct sock *)tp;
|
||||
+ const struct tcp_congestion_ops *ca_ops = inet_csk(sk)->icsk_ca_ops;
|
||||
+
|
||||
tp->lost += tcp_skb_pcount(skb);
|
||||
+ if (ca_ops->skb_marked_lost)
|
||||
+ ca_ops->skb_marked_lost(sk, skb);
|
||||
}
|
||||
|
||||
void tcp_mark_skb_lost(struct sock *sk, struct sk_buff *skb)
|
59
debian/patches/misc-bbr3/0007-net-tcp_bbr-v2-adjust-skb-tx.in_flight-upon-merge-in.patch
vendored
Normal file
59
debian/patches/misc-bbr3/0007-net-tcp_bbr-v2-adjust-skb-tx.in_flight-upon-merge-in.patch
vendored
Normal file
@ -0,0 +1,59 @@
|
||||
From af7d33e71649b8e2ae00dccf336720a8ab891606 Mon Sep 17 00:00:00 2001
|
||||
From: Neal Cardwell <ncardwell@google.com>
|
||||
Date: Wed, 1 May 2019 20:16:33 -0400
|
||||
Subject: [PATCH 07/19] net-tcp_bbr: v2: adjust skb tx.in_flight upon merge in
|
||||
tcp_shifted_skb()
|
||||
|
||||
When tcp_shifted_skb() updates state as adjacent SACKed skbs are
|
||||
coalesced, previously the tx.in_flight was not adjusted, so we could
|
||||
get contradictory state where the skb's recorded pcount was bigger
|
||||
than the tx.in_flight (the number of segments that were in_flight
|
||||
after sending the skb).
|
||||
|
||||
Normally have a SACKed skb with contradictory pcount/tx.in_flight
|
||||
would not matter. However, with SACK reneging, the SACKed bit is
|
||||
removed, and an skb once again becomes eligible for retransmitting,
|
||||
fragmenting, SACKing, etc. Packetdrill testing verified the following
|
||||
sequence is possible in a kernel that does not have this commit:
|
||||
|
||||
- skb N is SACKed
|
||||
- skb N+1 is SACKed and combined with skb N using tcp_shifted_skb()
|
||||
- tcp_shifted_skb() will increase the pcount of prev,
|
||||
but leave tx.in_flight as-is
|
||||
- so prev skb can have pcount > tx.in_flight
|
||||
- RTO, tcp_timeout_mark_lost(), detect reneg,
|
||||
remove "SACKed" bit, mark skb N as lost
|
||||
- find pcount of skb N is greater than its tx.in_flight
|
||||
|
||||
I suspect this issue iw what caused the bbr2_inflight_hi_from_lost_skb():
|
||||
WARN_ON_ONCE(inflight_prev < 0)
|
||||
to fire in production machines using bbr2.
|
||||
|
||||
Effort: net-tcp_bbr
|
||||
Origin-9xx-SHA1: 1a3e997e613d2dcf32b947992882854ebe873715
|
||||
Change-Id: I1b0b75c27519953430c7db51c6f358f104c7af55
|
||||
Signed-off-by: Alexandre Frade <kernel@xanmod.org>
|
||||
---
|
||||
net/ipv4/tcp_input.c | 11 +++++++++++
|
||||
1 file changed, 11 insertions(+)
|
||||
|
||||
--- a/net/ipv4/tcp_input.c
|
||||
+++ b/net/ipv4/tcp_input.c
|
||||
@@ -1506,6 +1506,17 @@ static bool tcp_shifted_skb(struct sock
|
||||
WARN_ON_ONCE(tcp_skb_pcount(skb) < pcount);
|
||||
tcp_skb_pcount_add(skb, -pcount);
|
||||
|
||||
+ /* Adjust tx.in_flight as pcount is shifted from skb to prev. */
|
||||
+ if (WARN_ONCE(TCP_SKB_CB(skb)->tx.in_flight < pcount,
|
||||
+ "prev in_flight: %u skb in_flight: %u pcount: %u",
|
||||
+ TCP_SKB_CB(prev)->tx.in_flight,
|
||||
+ TCP_SKB_CB(skb)->tx.in_flight,
|
||||
+ pcount))
|
||||
+ TCP_SKB_CB(skb)->tx.in_flight = 0;
|
||||
+ else
|
||||
+ TCP_SKB_CB(skb)->tx.in_flight -= pcount;
|
||||
+ TCP_SKB_CB(prev)->tx.in_flight += pcount;
|
||||
+
|
||||
/* When we're adding to gso_segs == 1, gso_size will be zero,
|
||||
* in theory this shouldn't be necessary but as long as DSACK
|
||||
* code can come after this skb later on it's better to keep
|
97
debian/patches/misc-bbr3/0008-net-tcp_bbr-v2-adjust-skb-tx.in_flight-upon-split-in.patch
vendored
Normal file
97
debian/patches/misc-bbr3/0008-net-tcp_bbr-v2-adjust-skb-tx.in_flight-upon-split-in.patch
vendored
Normal file
@ -0,0 +1,97 @@
|
||||
From a4d44bce49f61f8755f558dc40edff5f8958b7c6 Mon Sep 17 00:00:00 2001
|
||||
From: Neal Cardwell <ncardwell@google.com>
|
||||
Date: Wed, 1 May 2019 20:16:25 -0400
|
||||
Subject: [PATCH 08/19] net-tcp_bbr: v2: adjust skb tx.in_flight upon split in
|
||||
tcp_fragment()
|
||||
|
||||
When we fragment an skb that has already been sent, we need to update
|
||||
the tx.in_flight for the first skb in the resulting pair ("buff").
|
||||
|
||||
Because we were not updating the tx.in_flight, the tx.in_flight value
|
||||
was inconsistent with the pcount of the "buff" skb (tx.in_flight would
|
||||
be too high). That meant that if the "buff" skb was lost, then
|
||||
bbr2_inflight_hi_from_lost_skb() would calculate an inflight_hi value
|
||||
that is too high. This could result in longer queues and higher packet
|
||||
loss.
|
||||
|
||||
Packetdrill testing verified that without this commit, when the second
|
||||
half of an skb is SACKed and then later the first half of that skb is
|
||||
marked lost, the calculated inflight_hi was incorrect.
|
||||
|
||||
Effort: net-tcp_bbr
|
||||
Origin-9xx-SHA1: 385f1ddc610798fab2837f9f372857438b25f874
|
||||
Origin-9xx-SHA1: a0eb099690af net-tcp_bbr: v2: fix tcp_fragment() tx.in_flight recomputation [prod feb 8 2021; use as a fixup]
|
||||
Origin-9xx-SHA1: 885503228153ff0c9114e net-tcp_bbr: v2: introduce tcp_skb_tx_in_flight_is_suspicious() helper for warnings
|
||||
Change-Id: I617f8cab4e9be7a0b8e8d30b047bf8645393354d
|
||||
Signed-off-by: Alexandre Frade <kernel@xanmod.org>
|
||||
---
|
||||
include/net/tcp.h | 15 +++++++++++++++
|
||||
net/ipv4/tcp_output.c | 26 +++++++++++++++++++++++++-
|
||||
2 files changed, 40 insertions(+), 1 deletion(-)
|
||||
|
||||
--- a/include/net/tcp.h
|
||||
+++ b/include/net/tcp.h
|
||||
@@ -1283,6 +1283,21 @@ static inline bool tcp_skb_sent_after(u6
|
||||
return t1 > t2 || (t1 == t2 && after(seq1, seq2));
|
||||
}
|
||||
|
||||
+/* If a retransmit failed due to local qdisc congestion or other local issues,
|
||||
+ * then we may have called tcp_set_skb_tso_segs() to increase the number of
|
||||
+ * segments in the skb without increasing the tx.in_flight. In all other cases,
|
||||
+ * the tx.in_flight should be at least as big as the pcount of the sk_buff. We
|
||||
+ * do not have the state to know whether a retransmit failed due to local qdisc
|
||||
+ * congestion or other local issues, so to avoid spurious warnings we consider
|
||||
+ * that any skb marked lost may have suffered that fate.
|
||||
+ */
|
||||
+static inline bool tcp_skb_tx_in_flight_is_suspicious(u32 skb_pcount,
|
||||
+ u32 skb_sacked_flags,
|
||||
+ u32 tx_in_flight)
|
||||
+{
|
||||
+ return (skb_pcount > tx_in_flight) && !(skb_sacked_flags & TCPCB_LOST);
|
||||
+}
|
||||
+
|
||||
/* These functions determine how the current flow behaves in respect of SACK
|
||||
* handling. SACK is negotiated with the peer, and therefore it can vary
|
||||
* between different flows.
|
||||
--- a/net/ipv4/tcp_output.c
|
||||
+++ b/net/ipv4/tcp_output.c
|
||||
@@ -1601,7 +1601,7 @@ int tcp_fragment(struct sock *sk, enum t
|
||||
{
|
||||
struct tcp_sock *tp = tcp_sk(sk);
|
||||
struct sk_buff *buff;
|
||||
- int old_factor;
|
||||
+ int old_factor, inflight_prev;
|
||||
long limit;
|
||||
int nlen;
|
||||
u8 flags;
|
||||
@@ -1676,6 +1676,30 @@ int tcp_fragment(struct sock *sk, enum t
|
||||
|
||||
if (diff)
|
||||
tcp_adjust_pcount(sk, skb, diff);
|
||||
+
|
||||
+ inflight_prev = TCP_SKB_CB(skb)->tx.in_flight - old_factor;
|
||||
+ if (inflight_prev < 0) {
|
||||
+ WARN_ONCE(tcp_skb_tx_in_flight_is_suspicious(
|
||||
+ old_factor,
|
||||
+ TCP_SKB_CB(skb)->sacked,
|
||||
+ TCP_SKB_CB(skb)->tx.in_flight),
|
||||
+ "inconsistent: tx.in_flight: %u "
|
||||
+ "old_factor: %d mss: %u sacked: %u "
|
||||
+ "1st pcount: %d 2nd pcount: %d "
|
||||
+ "1st len: %u 2nd len: %u ",
|
||||
+ TCP_SKB_CB(skb)->tx.in_flight, old_factor,
|
||||
+ mss_now, TCP_SKB_CB(skb)->sacked,
|
||||
+ tcp_skb_pcount(skb), tcp_skb_pcount(buff),
|
||||
+ skb->len, buff->len);
|
||||
+ inflight_prev = 0;
|
||||
+ }
|
||||
+ /* Set 1st tx.in_flight as if 1st were sent by itself: */
|
||||
+ TCP_SKB_CB(skb)->tx.in_flight = inflight_prev +
|
||||
+ tcp_skb_pcount(skb);
|
||||
+ /* Set 2nd tx.in_flight with new 1st and 2nd pcounts: */
|
||||
+ TCP_SKB_CB(buff)->tx.in_flight = inflight_prev +
|
||||
+ tcp_skb_pcount(skb) +
|
||||
+ tcp_skb_pcount(buff);
|
||||
}
|
||||
|
||||
/* Link BUFF into the send queue. */
|
73
debian/patches/misc-bbr3/0009-net-tcp-add-new-ca-opts-flag-TCP_CONG_WANTS_CE_EVENT.patch
vendored
Normal file
73
debian/patches/misc-bbr3/0009-net-tcp-add-new-ca-opts-flag-TCP_CONG_WANTS_CE_EVENT.patch
vendored
Normal file
@ -0,0 +1,73 @@
|
||||
From 65cca0e8fd954a150ec874650af47f7800ea3049 Mon Sep 17 00:00:00 2001
|
||||
From: Yousuk Seung <ysseung@google.com>
|
||||
Date: Wed, 23 May 2018 17:55:54 -0700
|
||||
Subject: [PATCH 09/19] net-tcp: add new ca opts flag TCP_CONG_WANTS_CE_EVENTS
|
||||
|
||||
Add a a new ca opts flag TCP_CONG_WANTS_CE_EVENTS that allows a
|
||||
congestion control module to receive CE events.
|
||||
|
||||
Currently congestion control modules have to set the TCP_CONG_NEEDS_ECN
|
||||
bit in opts flag to receive CE events but this may incur changes in ECN
|
||||
behavior elsewhere. This patch adds a new bit TCP_CONG_WANTS_CE_EVENTS
|
||||
that allows congestion control modules to receive CE events
|
||||
independently of TCP_CONG_NEEDS_ECN.
|
||||
|
||||
Effort: net-tcp
|
||||
Origin-9xx-SHA1: 9f7e14716cde760bc6c67ef8ef7e1ee48501d95b
|
||||
Change-Id: I2255506985242f376d910c6fd37daabaf4744f24
|
||||
Signed-off-by: Alexandre Frade <kernel@xanmod.org>
|
||||
---
|
||||
include/net/tcp.h | 14 +++++++++++++-
|
||||
net/ipv4/tcp_input.c | 4 ++--
|
||||
2 files changed, 15 insertions(+), 3 deletions(-)
|
||||
|
||||
--- a/include/net/tcp.h
|
||||
+++ b/include/net/tcp.h
|
||||
@@ -1119,7 +1119,11 @@ enum tcp_ca_ack_event_flags {
|
||||
#define TCP_CONG_NON_RESTRICTED 0x1
|
||||
/* Requires ECN/ECT set on all packets */
|
||||
#define TCP_CONG_NEEDS_ECN 0x2
|
||||
-#define TCP_CONG_MASK (TCP_CONG_NON_RESTRICTED | TCP_CONG_NEEDS_ECN)
|
||||
+/* Wants notification of CE events (CA_EVENT_ECN_IS_CE, CA_EVENT_ECN_NO_CE). */
|
||||
+#define TCP_CONG_WANTS_CE_EVENTS 0x4
|
||||
+#define TCP_CONG_MASK (TCP_CONG_NON_RESTRICTED | \
|
||||
+ TCP_CONG_NEEDS_ECN | \
|
||||
+ TCP_CONG_WANTS_CE_EVENTS)
|
||||
|
||||
union tcp_cc_info;
|
||||
|
||||
@@ -1251,6 +1255,14 @@ static inline char *tcp_ca_get_name_by_k
|
||||
}
|
||||
#endif
|
||||
|
||||
+static inline bool tcp_ca_wants_ce_events(const struct sock *sk)
|
||||
+{
|
||||
+ const struct inet_connection_sock *icsk = inet_csk(sk);
|
||||
+
|
||||
+ return icsk->icsk_ca_ops->flags & (TCP_CONG_NEEDS_ECN |
|
||||
+ TCP_CONG_WANTS_CE_EVENTS);
|
||||
+}
|
||||
+
|
||||
static inline bool tcp_ca_needs_ecn(const struct sock *sk)
|
||||
{
|
||||
const struct inet_connection_sock *icsk = inet_csk(sk);
|
||||
--- a/net/ipv4/tcp_input.c
|
||||
+++ b/net/ipv4/tcp_input.c
|
||||
@@ -370,7 +370,7 @@ static void __tcp_ecn_check_ce(struct so
|
||||
tcp_enter_quickack_mode(sk, 2);
|
||||
break;
|
||||
case INET_ECN_CE:
|
||||
- if (tcp_ca_needs_ecn(sk))
|
||||
+ if (tcp_ca_wants_ce_events(sk))
|
||||
tcp_ca_event(sk, CA_EVENT_ECN_IS_CE);
|
||||
|
||||
if (!(tp->ecn_flags & TCP_ECN_DEMAND_CWR)) {
|
||||
@@ -381,7 +381,7 @@ static void __tcp_ecn_check_ce(struct so
|
||||
tp->ecn_flags |= TCP_ECN_SEEN;
|
||||
break;
|
||||
default:
|
||||
- if (tcp_ca_needs_ecn(sk))
|
||||
+ if (tcp_ca_wants_ce_events(sk))
|
||||
tcp_ca_event(sk, CA_EVENT_ECN_NO_CE);
|
||||
tp->ecn_flags |= TCP_ECN_SEEN;
|
||||
break;
|
118
debian/patches/misc-bbr3/0010-net-tcp-re-generalize-TSO-sizing-in-TCP-CC-module-AP.patch
vendored
Normal file
118
debian/patches/misc-bbr3/0010-net-tcp-re-generalize-TSO-sizing-in-TCP-CC-module-AP.patch
vendored
Normal file
@ -0,0 +1,118 @@
|
||||
From 3acb852e1cfcdeea388bd428c6dd81609fd40792 Mon Sep 17 00:00:00 2001
|
||||
From: Neal Cardwell <ncardwell@google.com>
|
||||
Date: Fri, 27 Sep 2019 17:10:26 -0400
|
||||
Subject: [PATCH 10/19] net-tcp: re-generalize TSO sizing in TCP CC module API
|
||||
|
||||
Reorganize the API for CC modules so that the CC module once again
|
||||
gets complete control of the TSO sizing decision. This is how the API
|
||||
was set up around 2016 and the initial BBRv1 upstreaming. Later Eric
|
||||
Dumazet simplified it. But with wider testing it now seems that to
|
||||
avoid CPU regressions BBR needs to have a different TSO sizing
|
||||
function.
|
||||
|
||||
This is necessary to handle cases where there are many flows
|
||||
bottlenecked on the sender host's NIC, in which case BBR's pacing rate
|
||||
is much lower than CUBIC/Reno/DCTCP's. Why does this happen? Because
|
||||
BBR's pacing rate adapts to the low bandwidth share each flow sees. By
|
||||
contrast, CUBIC/Reno/DCTCP see no loss or ECN, so they grow a very
|
||||
large cwnd, and thus large pacing rate and large TSO burst size.
|
||||
|
||||
Change-Id: Ic8ccfdbe4010ee8d4bf6a6334c48a2fceb2171ea
|
||||
Signed-off-by: Alexandre Frade <kernel@xanmod.org>
|
||||
---
|
||||
include/net/tcp.h | 4 ++--
|
||||
net/ipv4/tcp_bbr.c | 37 ++++++++++++++++++++++++++-----------
|
||||
net/ipv4/tcp_output.c | 11 +++++------
|
||||
3 files changed, 33 insertions(+), 19 deletions(-)
|
||||
|
||||
--- a/include/net/tcp.h
|
||||
+++ b/include/net/tcp.h
|
||||
@@ -1185,8 +1185,8 @@ struct tcp_congestion_ops {
|
||||
/* hook for packet ack accounting (optional) */
|
||||
void (*pkts_acked)(struct sock *sk, const struct ack_sample *sample);
|
||||
|
||||
- /* override sysctl_tcp_min_tso_segs */
|
||||
- u32 (*min_tso_segs)(struct sock *sk);
|
||||
+ /* pick target number of segments per TSO/GSO skb (optional): */
|
||||
+ u32 (*tso_segs)(struct sock *sk, unsigned int mss_now);
|
||||
|
||||
/* react to a specific lost skb (optional) */
|
||||
void (*skb_marked_lost)(struct sock *sk, const struct sk_buff *skb);
|
||||
--- a/net/ipv4/tcp_bbr.c
|
||||
+++ b/net/ipv4/tcp_bbr.c
|
||||
@@ -301,20 +301,35 @@ __bpf_kfunc static u32 bbr_min_tso_segs(
|
||||
return READ_ONCE(sk->sk_pacing_rate) < (bbr_min_tso_rate >> 3) ? 1 : 2;
|
||||
}
|
||||
|
||||
+/* Return the number of segments BBR would like in a TSO/GSO skb, given
|
||||
+ * a particular max gso size as a constraint.
|
||||
+ */
|
||||
+static u32 bbr_tso_segs_generic(struct sock *sk, unsigned int mss_now,
|
||||
+ u32 gso_max_size)
|
||||
+{
|
||||
+ u32 segs;
|
||||
+ u64 bytes;
|
||||
+
|
||||
+ /* Budget a TSO/GSO burst size allowance based on bw (pacing_rate). */
|
||||
+ bytes = READ_ONCE(sk->sk_pacing_rate) >> READ_ONCE(sk->sk_pacing_shift);
|
||||
+
|
||||
+ bytes = min_t(u32, bytes, gso_max_size - 1 - MAX_TCP_HEADER);
|
||||
+ segs = max_t(u32, bytes / mss_now, bbr_min_tso_segs(sk));
|
||||
+ return segs;
|
||||
+}
|
||||
+
|
||||
+/* Custom tcp_tso_autosize() for BBR, used at transmit time to cap skb size. */
|
||||
+static u32 bbr_tso_segs(struct sock *sk, unsigned int mss_now)
|
||||
+{
|
||||
+ return bbr_tso_segs_generic(sk, mss_now, sk->sk_gso_max_size);
|
||||
+}
|
||||
+
|
||||
+/* Like bbr_tso_segs(), using mss_cache, ignoring driver's sk_gso_max_size. */
|
||||
static u32 bbr_tso_segs_goal(struct sock *sk)
|
||||
{
|
||||
struct tcp_sock *tp = tcp_sk(sk);
|
||||
- u32 segs, bytes;
|
||||
-
|
||||
- /* Sort of tcp_tso_autosize() but ignoring
|
||||
- * driver provided sk_gso_max_size.
|
||||
- */
|
||||
- bytes = min_t(unsigned long,
|
||||
- READ_ONCE(sk->sk_pacing_rate) >> READ_ONCE(sk->sk_pacing_shift),
|
||||
- GSO_LEGACY_MAX_SIZE - 1 - MAX_TCP_HEADER);
|
||||
- segs = max_t(u32, bytes / tp->mss_cache, bbr_min_tso_segs(sk));
|
||||
|
||||
- return min(segs, 0x7FU);
|
||||
+ return bbr_tso_segs_generic(sk, tp->mss_cache, GSO_MAX_SIZE);
|
||||
}
|
||||
|
||||
/* Save "last known good" cwnd so we can restore it after losses or PROBE_RTT */
|
||||
@@ -1150,7 +1165,7 @@ static struct tcp_congestion_ops tcp_bbr
|
||||
.undo_cwnd = bbr_undo_cwnd,
|
||||
.cwnd_event = bbr_cwnd_event,
|
||||
.ssthresh = bbr_ssthresh,
|
||||
- .min_tso_segs = bbr_min_tso_segs,
|
||||
+ .tso_segs = bbr_tso_segs,
|
||||
.get_info = bbr_get_info,
|
||||
.set_state = bbr_set_state,
|
||||
};
|
||||
--- a/net/ipv4/tcp_output.c
|
||||
+++ b/net/ipv4/tcp_output.c
|
||||
@@ -2057,13 +2057,12 @@ static u32 tcp_tso_autosize(const struct
|
||||
static u32 tcp_tso_segs(struct sock *sk, unsigned int mss_now)
|
||||
{
|
||||
const struct tcp_congestion_ops *ca_ops = inet_csk(sk)->icsk_ca_ops;
|
||||
- u32 min_tso, tso_segs;
|
||||
+ u32 tso_segs;
|
||||
|
||||
- min_tso = ca_ops->min_tso_segs ?
|
||||
- ca_ops->min_tso_segs(sk) :
|
||||
- READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_min_tso_segs);
|
||||
-
|
||||
- tso_segs = tcp_tso_autosize(sk, mss_now, min_tso);
|
||||
+ tso_segs = ca_ops->tso_segs ?
|
||||
+ ca_ops->tso_segs(sk, mss_now) :
|
||||
+ tcp_tso_autosize(sk, mss_now,
|
||||
+ sock_net(sk)->ipv4.sysctl_tcp_min_tso_segs);
|
||||
return min_t(u32, tso_segs, sk->sk_gso_max_segs);
|
||||
}
|
||||
|
72
debian/patches/misc-bbr3/0011-net-tcp-add-fast_ack_mode-1-skip-rwin-check-in-tcp_f.patch
vendored
Normal file
72
debian/patches/misc-bbr3/0011-net-tcp-add-fast_ack_mode-1-skip-rwin-check-in-tcp_f.patch
vendored
Normal file
@ -0,0 +1,72 @@
|
||||
From 3741ada76bab5111cbb9c279cf27e67a0334eb05 Mon Sep 17 00:00:00 2001
|
||||
From: Neal Cardwell <ncardwell@google.com>
|
||||
Date: Sun, 7 Jan 2024 21:11:26 -0300
|
||||
Subject: [PATCH 11/19] net-tcp: add fast_ack_mode=1: skip rwin check in
|
||||
tcp_fast_ack_mode__tcp_ack_snd_check()
|
||||
|
||||
Add logic for an experimental TCP connection behavior, enabled with
|
||||
tp->fast_ack_mode = 1, which disables checking the receive window
|
||||
before sending an ack in __tcp_ack_snd_check(). If this behavior is
|
||||
enabled, the data receiver sends an ACK if the amount of data is >
|
||||
RCV.MSS.
|
||||
|
||||
Change-Id: Iaa0a0fd7108221f883137a79d5bfa724f1b096d4
|
||||
Signed-off-by: Alexandre Frade <kernel@xanmod.org>
|
||||
---
|
||||
include/linux/tcp.h | 3 ++-
|
||||
net/ipv4/tcp.c | 1 +
|
||||
net/ipv4/tcp_cong.c | 1 +
|
||||
net/ipv4/tcp_input.c | 5 +++--
|
||||
4 files changed, 7 insertions(+), 3 deletions(-)
|
||||
|
||||
--- a/include/linux/tcp.h
|
||||
+++ b/include/linux/tcp.h
|
||||
@@ -369,7 +369,8 @@ struct tcp_sock {
|
||||
u8 compressed_ack;
|
||||
u8 dup_ack_counter:2,
|
||||
tlp_retrans:1, /* TLP is a retransmission */
|
||||
- unused:5;
|
||||
+ fast_ack_mode:2, /* which fast ack mode ? */
|
||||
+ unused:3;
|
||||
u8 thin_lto : 1,/* Use linear timeouts for thin streams */
|
||||
fastopen_connect:1, /* FASTOPEN_CONNECT sockopt */
|
||||
fastopen_no_cookie:1, /* Allow send/recv SYN+data without a cookie */
|
||||
--- a/net/ipv4/tcp.c
|
||||
+++ b/net/ipv4/tcp.c
|
||||
@@ -3123,6 +3123,7 @@ int tcp_disconnect(struct sock *sk, int
|
||||
tp->rx_opt.dsack = 0;
|
||||
tp->rx_opt.num_sacks = 0;
|
||||
tp->rcv_ooopack = 0;
|
||||
+ tp->fast_ack_mode = 0;
|
||||
|
||||
|
||||
/* Clean up fastopen related fields */
|
||||
--- a/net/ipv4/tcp_cong.c
|
||||
+++ b/net/ipv4/tcp_cong.c
|
||||
@@ -237,6 +237,7 @@ void tcp_init_congestion_control(struct
|
||||
struct inet_connection_sock *icsk = inet_csk(sk);
|
||||
|
||||
tcp_sk(sk)->prior_ssthresh = 0;
|
||||
+ tcp_sk(sk)->fast_ack_mode = 0;
|
||||
if (icsk->icsk_ca_ops->init)
|
||||
icsk->icsk_ca_ops->init(sk);
|
||||
if (tcp_ca_needs_ecn(sk))
|
||||
--- a/net/ipv4/tcp_input.c
|
||||
+++ b/net/ipv4/tcp_input.c
|
||||
@@ -5763,13 +5763,14 @@ static void __tcp_ack_snd_check(struct s
|
||||
|
||||
/* More than one full frame received... */
|
||||
if (((tp->rcv_nxt - tp->rcv_wup) > inet_csk(sk)->icsk_ack.rcv_mss &&
|
||||
+ (tp->fast_ack_mode == 1 ||
|
||||
/* ... and right edge of window advances far enough.
|
||||
* (tcp_recvmsg() will send ACK otherwise).
|
||||
* If application uses SO_RCVLOWAT, we want send ack now if
|
||||
* we have not received enough bytes to satisfy the condition.
|
||||
*/
|
||||
- (tp->rcv_nxt - tp->copied_seq < sk->sk_rcvlowat ||
|
||||
- __tcp_select_window(sk) >= tp->rcv_wnd)) ||
|
||||
+ (tp->rcv_nxt - tp->copied_seq < sk->sk_rcvlowat ||
|
||||
+ __tcp_select_window(sk) >= tp->rcv_wnd))) ||
|
||||
/* We ACK each frame or... */
|
||||
tcp_in_quickack_mode(sk) ||
|
||||
/* Protocol state mandates a one-time immediate ACK */
|
45
debian/patches/misc-bbr3/0012-net-tcp_bbr-v2-record-app-limited-status-of-TLP-repa.patch
vendored
Normal file
45
debian/patches/misc-bbr3/0012-net-tcp_bbr-v2-record-app-limited-status-of-TLP-repa.patch
vendored
Normal file
@ -0,0 +1,45 @@
|
||||
From e5d35b7c882b7001f8a31b14c9f08917230dedc3 Mon Sep 17 00:00:00 2001
|
||||
From: Jianfeng Wang <jfwang@google.com>
|
||||
Date: Fri, 19 Jun 2020 17:33:45 +0000
|
||||
Subject: [PATCH 12/19] net-tcp_bbr: v2: record app-limited status of
|
||||
TLP-repaired flight
|
||||
|
||||
When sending a TLP retransmit, record whether the outstanding flight
|
||||
of data is application limited. This is important for congestion
|
||||
control modules that want to respond to losses repaired by TLP
|
||||
retransmits. This is important because the following scenarios convey
|
||||
very different information:
|
||||
(1) a packet loss with a small number of packets in flight;
|
||||
(2) a packet loss with the maximum amount of data in flight allowed
|
||||
by the CC module;
|
||||
|
||||
Effort: net-tcp_bbr
|
||||
Change-Id: Ic8ae567caa4e4bfd5fd82c3d4be12a5d9171655e
|
||||
Signed-off-by: Alexandre Frade <kernel@xanmod.org>
|
||||
---
|
||||
include/linux/tcp.h | 3 ++-
|
||||
net/ipv4/tcp_output.c | 1 +
|
||||
2 files changed, 3 insertions(+), 1 deletion(-)
|
||||
|
||||
--- a/include/linux/tcp.h
|
||||
+++ b/include/linux/tcp.h
|
||||
@@ -370,7 +370,8 @@ struct tcp_sock {
|
||||
u8 dup_ack_counter:2,
|
||||
tlp_retrans:1, /* TLP is a retransmission */
|
||||
fast_ack_mode:2, /* which fast ack mode ? */
|
||||
- unused:3;
|
||||
+ tlp_orig_data_app_limited:1, /* app-limited before TLP rtx? */
|
||||
+ unused:2;
|
||||
u8 thin_lto : 1,/* Use linear timeouts for thin streams */
|
||||
fastopen_connect:1, /* FASTOPEN_CONNECT sockopt */
|
||||
fastopen_no_cookie:1, /* Allow send/recv SYN+data without a cookie */
|
||||
--- a/net/ipv4/tcp_output.c
|
||||
+++ b/net/ipv4/tcp_output.c
|
||||
@@ -3003,6 +3003,7 @@ void tcp_send_loss_probe(struct sock *sk
|
||||
if (WARN_ON(!skb || !tcp_skb_pcount(skb)))
|
||||
goto rearm_timer;
|
||||
|
||||
+ tp->tlp_orig_data_app_limited = TCP_SKB_CB(skb)->tx.is_app_limited;
|
||||
if (__tcp_retransmit_skb(sk, skb, 1))
|
||||
goto rearm_timer;
|
||||
|
45
debian/patches/misc-bbr3/0013-net-tcp_bbr-v2-inform-CC-module-of-losses-repaired-b.patch
vendored
Normal file
45
debian/patches/misc-bbr3/0013-net-tcp_bbr-v2-inform-CC-module-of-losses-repaired-b.patch
vendored
Normal file
@ -0,0 +1,45 @@
|
||||
From 77e7c22b63f8934206b1e89c173558c3967f0779 Mon Sep 17 00:00:00 2001
|
||||
From: Jianfeng Wang <jfwang@google.com>
|
||||
Date: Tue, 16 Jun 2020 17:41:19 +0000
|
||||
Subject: [PATCH 13/19] net-tcp_bbr: v2: inform CC module of losses repaired by
|
||||
TLP probe
|
||||
|
||||
Before this commit, when there is a packet loss that creates a sequence
|
||||
hole that is filled by a TLP loss probe, then tcp_process_tlp_ack()
|
||||
only informs the congestion control (CC) module via a back-to-back entry
|
||||
and exit of CWR. But some congestion control modules (e.g. BBR) do not
|
||||
respond to CWR events.
|
||||
|
||||
This commit adds a new CA event with which the core TCP stack notifies
|
||||
the CC module when a loss is repaired by a TLP. This will allow CC
|
||||
modules that do not use the CWR mechanism to have a custom handler for
|
||||
such TLP recoveries.
|
||||
|
||||
Effort: net-tcp_bbr
|
||||
Change-Id: Ieba72332b401b329bff5a641d2b2043a3fb8f632
|
||||
Signed-off-by: Alexandre Frade <kernel@xanmod.org>
|
||||
---
|
||||
include/net/tcp.h | 1 +
|
||||
net/ipv4/tcp_input.c | 1 +
|
||||
2 files changed, 2 insertions(+)
|
||||
|
||||
--- a/include/net/tcp.h
|
||||
+++ b/include/net/tcp.h
|
||||
@@ -1097,6 +1097,7 @@ enum tcp_ca_event {
|
||||
CA_EVENT_LOSS, /* loss timeout */
|
||||
CA_EVENT_ECN_NO_CE, /* ECT set, but not CE marked */
|
||||
CA_EVENT_ECN_IS_CE, /* received CE marked IP packet */
|
||||
+ CA_EVENT_TLP_RECOVERY, /* a lost segment was repaired by TLP probe */
|
||||
};
|
||||
|
||||
/* Information about inbound ACK, passed to cong_ops->in_ack_event() */
|
||||
--- a/net/ipv4/tcp_input.c
|
||||
+++ b/net/ipv4/tcp_input.c
|
||||
@@ -3859,6 +3859,7 @@ static void tcp_process_tlp_ack(struct s
|
||||
/* ACK advances: there was a loss, so reduce cwnd. Reset
|
||||
* tlp_high_seq in tcp_init_cwnd_reduction()
|
||||
*/
|
||||
+ tcp_ca_event(sk, CA_EVENT_TLP_RECOVERY);
|
||||
tcp_init_cwnd_reduction(sk);
|
||||
tcp_set_ca_state(sk, TCP_CA_CWR);
|
||||
tcp_end_cwnd_reduction(sk);
|
73
debian/patches/misc-bbr3/0014-net-tcp_bbr-v2-introduce-is_acking_tlp_retrans_seq-i.patch
vendored
Normal file
73
debian/patches/misc-bbr3/0014-net-tcp_bbr-v2-introduce-is_acking_tlp_retrans_seq-i.patch
vendored
Normal file
@ -0,0 +1,73 @@
|
||||
From cab22a8e2e87870e8334a12ffcd0ba04ea81126f Mon Sep 17 00:00:00 2001
|
||||
From: Neal Cardwell <ncardwell@google.com>
|
||||
Date: Mon, 21 Sep 2020 14:46:26 -0400
|
||||
Subject: [PATCH 14/19] net-tcp_bbr: v2: introduce is_acking_tlp_retrans_seq
|
||||
into rate_sample
|
||||
|
||||
Introduce is_acking_tlp_retrans_seq into rate_sample. This bool will
|
||||
export to the CC module the knowledge of whether the current ACK
|
||||
matched a TLP retransmit.
|
||||
|
||||
Note that when this bool is true, we cannot yet tell (in general) whether
|
||||
this ACK is for the original or the TLP retransmit.
|
||||
|
||||
Effort: net-tcp_bbr
|
||||
Change-Id: I2e6494332167e75efcbdc99bd5c119034e9c39b4
|
||||
Signed-off-by: Alexandre Frade <kernel@xanmod.org>
|
||||
---
|
||||
include/net/tcp.h | 1 +
|
||||
net/ipv4/tcp_input.c | 12 +++++++++---
|
||||
2 files changed, 10 insertions(+), 3 deletions(-)
|
||||
|
||||
--- a/include/net/tcp.h
|
||||
+++ b/include/net/tcp.h
|
||||
@@ -1161,6 +1161,7 @@ struct rate_sample {
|
||||
u32 last_end_seq; /* end_seq of most recently ACKed packet */
|
||||
bool is_app_limited; /* is sample from packet with bubble in pipe? */
|
||||
bool is_retrans; /* is sample from retransmission? */
|
||||
+ bool is_acking_tlp_retrans_seq; /* ACKed a TLP retransmit sequence? */
|
||||
bool is_ack_delayed; /* is this (likely) a delayed ACK? */
|
||||
bool is_ece; /* did this ACK have ECN marked? */
|
||||
};
|
||||
--- a/net/ipv4/tcp_input.c
|
||||
+++ b/net/ipv4/tcp_input.c
|
||||
@@ -3842,7 +3842,8 @@ static void tcp_replace_ts_recent(struct
|
||||
/* This routine deals with acks during a TLP episode and ends an episode by
|
||||
* resetting tlp_high_seq. Ref: TLP algorithm in draft-ietf-tcpm-rack
|
||||
*/
|
||||
-static void tcp_process_tlp_ack(struct sock *sk, u32 ack, int flag)
|
||||
+static void tcp_process_tlp_ack(struct sock *sk, u32 ack, int flag,
|
||||
+ struct rate_sample *rs)
|
||||
{
|
||||
struct tcp_sock *tp = tcp_sk(sk);
|
||||
|
||||
@@ -3870,6 +3871,11 @@ static void tcp_process_tlp_ack(struct s
|
||||
FLAG_NOT_DUP | FLAG_DATA_SACKED))) {
|
||||
/* Pure dupack: original and TLP probe arrived; no loss */
|
||||
tp->tlp_high_seq = 0;
|
||||
+ } else {
|
||||
+ /* This ACK matches a TLP retransmit. We cannot yet tell if
|
||||
+ * this ACK is for the original or the TLP retransmit.
|
||||
+ */
|
||||
+ rs->is_acking_tlp_retrans_seq = 1;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -4053,7 +4059,7 @@ static int tcp_ack(struct sock *sk, cons
|
||||
tcp_rack_update_reo_wnd(sk, &rs);
|
||||
|
||||
if (tp->tlp_high_seq)
|
||||
- tcp_process_tlp_ack(sk, ack, flag);
|
||||
+ tcp_process_tlp_ack(sk, ack, flag, &rs);
|
||||
|
||||
if (tcp_ack_is_dubious(sk, flag)) {
|
||||
if (!(flag & (FLAG_SND_UNA_ADVANCED |
|
||||
@@ -4097,7 +4103,7 @@ no_queue:
|
||||
tcp_ack_probe(sk);
|
||||
|
||||
if (tp->tlp_high_seq)
|
||||
- tcp_process_tlp_ack(sk, ack, flag);
|
||||
+ tcp_process_tlp_ack(sk, ack, flag, &rs);
|
||||
return 1;
|
||||
|
||||
old_ack:
|
112
debian/patches/misc-bbr3/0015-tcp-introduce-per-route-feature-RTAX_FEATURE_ECN_LOW.patch
vendored
Normal file
112
debian/patches/misc-bbr3/0015-tcp-introduce-per-route-feature-RTAX_FEATURE_ECN_LOW.patch
vendored
Normal file
@ -0,0 +1,112 @@
|
||||
From 38dd25482f815d949fec91edd7694b2f15823f67 Mon Sep 17 00:00:00 2001
|
||||
From: David Morley <morleyd@google.com>
|
||||
Date: Fri, 14 Jul 2023 11:07:56 -0400
|
||||
Subject: [PATCH 15/19] tcp: introduce per-route feature RTAX_FEATURE_ECN_LOW
|
||||
|
||||
Define and implement a new per-route feature, RTAX_FEATURE_ECN_LOW.
|
||||
|
||||
This feature indicates that the given destination network is a
|
||||
low-latency ECN environment, meaning both that ECN CE marks are
|
||||
applied by the network using a low-latency marking threshold and also
|
||||
that TCP endpoints provide precise per-data-segment ECN feedback in
|
||||
ACKs (where the ACK ECE flag echoes the received CE status of all
|
||||
newly-acknowledged data segments). This feature indication can be used
|
||||
by congestion control algorithms to decide how to interpret ECN
|
||||
signals over the given destination network.
|
||||
|
||||
This feature is appropriate for datacenter-style ECN marking, such as
|
||||
the ECN marking approach expected by DCTCP or BBR congestion control
|
||||
modules.
|
||||
|
||||
Signed-off-by: David Morley <morleyd@google.com>
|
||||
Signed-off-by: Neal Cardwell <ncardwell@google.com>
|
||||
Signed-off-by: Yuchung Cheng <ycheng@google.com>
|
||||
Tested-by: David Morley <morleyd@google.com>
|
||||
Change-Id: I6bc06e9c6cb426fbae7243fc71c9a8c18175f5d3
|
||||
Signed-off-by: Alexandre Frade <kernel@xanmod.org>
|
||||
---
|
||||
include/net/tcp.h | 10 ++++++++++
|
||||
include/uapi/linux/rtnetlink.h | 4 +++-
|
||||
net/ipv4/tcp_minisocks.c | 2 ++
|
||||
net/ipv4/tcp_output.c | 6 ++++--
|
||||
4 files changed, 19 insertions(+), 3 deletions(-)
|
||||
|
||||
--- a/include/net/tcp.h
|
||||
+++ b/include/net/tcp.h
|
||||
@@ -375,6 +375,7 @@ static inline void tcp_dec_quickack_mode
|
||||
#define TCP_ECN_QUEUE_CWR 2
|
||||
#define TCP_ECN_DEMAND_CWR 4
|
||||
#define TCP_ECN_SEEN 8
|
||||
+#define TCP_ECN_LOW 16
|
||||
|
||||
enum tcp_tw_status {
|
||||
TCP_TW_SUCCESS = 0,
|
||||
@@ -777,6 +778,15 @@ static inline void tcp_fast_path_check(s
|
||||
tcp_fast_path_on(tp);
|
||||
}
|
||||
|
||||
+static inline void tcp_set_ecn_low_from_dst(struct sock *sk,
|
||||
+ const struct dst_entry *dst)
|
||||
+{
|
||||
+ struct tcp_sock *tp = tcp_sk(sk);
|
||||
+
|
||||
+ if (dst_feature(dst, RTAX_FEATURE_ECN_LOW))
|
||||
+ tp->ecn_flags |= TCP_ECN_LOW;
|
||||
+}
|
||||
+
|
||||
u32 tcp_delack_max(const struct sock *sk);
|
||||
|
||||
/* Compute the actual rto_min value */
|
||||
--- a/include/uapi/linux/rtnetlink.h
|
||||
+++ b/include/uapi/linux/rtnetlink.h
|
||||
@@ -507,12 +507,14 @@ enum {
|
||||
#define RTAX_FEATURE_TIMESTAMP (1 << 2) /* unused */
|
||||
#define RTAX_FEATURE_ALLFRAG (1 << 3) /* unused */
|
||||
#define RTAX_FEATURE_TCP_USEC_TS (1 << 4)
|
||||
+#define RTAX_FEATURE_ECN_LOW (1 << 5)
|
||||
|
||||
#define RTAX_FEATURE_MASK (RTAX_FEATURE_ECN | \
|
||||
RTAX_FEATURE_SACK | \
|
||||
RTAX_FEATURE_TIMESTAMP | \
|
||||
RTAX_FEATURE_ALLFRAG | \
|
||||
- RTAX_FEATURE_TCP_USEC_TS)
|
||||
+ RTAX_FEATURE_TCP_USEC_TS | \
|
||||
+ RTAX_FEATURE_ECN_LOW)
|
||||
|
||||
struct rta_session {
|
||||
__u8 proto;
|
||||
--- a/net/ipv4/tcp_minisocks.c
|
||||
+++ b/net/ipv4/tcp_minisocks.c
|
||||
@@ -459,6 +459,8 @@ void tcp_ca_openreq_child(struct sock *s
|
||||
u32 ca_key = dst_metric(dst, RTAX_CC_ALGO);
|
||||
bool ca_got_dst = false;
|
||||
|
||||
+ tcp_set_ecn_low_from_dst(sk, dst);
|
||||
+
|
||||
if (ca_key != TCP_CA_UNSPEC) {
|
||||
const struct tcp_congestion_ops *ca;
|
||||
|
||||
--- a/net/ipv4/tcp_output.c
|
||||
+++ b/net/ipv4/tcp_output.c
|
||||
@@ -336,10 +336,9 @@ static void tcp_ecn_send_syn(struct sock
|
||||
bool bpf_needs_ecn = tcp_bpf_ca_needs_ecn(sk);
|
||||
bool use_ecn = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_ecn) == 1 ||
|
||||
tcp_ca_needs_ecn(sk) || bpf_needs_ecn;
|
||||
+ const struct dst_entry *dst = __sk_dst_get(sk);
|
||||
|
||||
if (!use_ecn) {
|
||||
- const struct dst_entry *dst = __sk_dst_get(sk);
|
||||
-
|
||||
if (dst && dst_feature(dst, RTAX_FEATURE_ECN))
|
||||
use_ecn = true;
|
||||
}
|
||||
@@ -351,6 +350,9 @@ static void tcp_ecn_send_syn(struct sock
|
||||
tp->ecn_flags = TCP_ECN_OK;
|
||||
if (tcp_ca_needs_ecn(sk) || bpf_needs_ecn)
|
||||
INET_ECN_xmit(sk);
|
||||
+
|
||||
+ if (dst)
|
||||
+ tcp_set_ecn_low_from_dst(sk, dst);
|
||||
}
|
||||
}
|
||||
|
2821
debian/patches/misc-bbr3/0016-net-tcp_bbr-v3-update-TCP-bbr-congestion-control-mod.patch
vendored
Normal file
2821
debian/patches/misc-bbr3/0016-net-tcp_bbr-v3-update-TCP-bbr-congestion-control-mod.patch
vendored
Normal file
File diff suppressed because it is too large
Load Diff
59
debian/patches/misc-bbr3/0017-net-tcp_bbr-v3-ensure-ECN-enabled-BBR-flows-set-ECT-.patch
vendored
Normal file
59
debian/patches/misc-bbr3/0017-net-tcp_bbr-v3-ensure-ECN-enabled-BBR-flows-set-ECT-.patch
vendored
Normal file
@ -0,0 +1,59 @@
|
||||
From 99e86f904f246ae9ec7a13d1b920eaf4a8c2d47b Mon Sep 17 00:00:00 2001
|
||||
From: Adithya Abraham Philip <abrahamphilip@google.com>
|
||||
Date: Fri, 11 Jun 2021 21:56:10 +0000
|
||||
Subject: [PATCH 17/19] net-tcp_bbr: v3: ensure ECN-enabled BBR flows set ECT
|
||||
on retransmits
|
||||
|
||||
Adds a new flag TCP_ECN_ECT_PERMANENT that is used by CCAs to
|
||||
indicate that retransmitted packets and pure ACKs must have the
|
||||
ECT bit set. This is necessary for BBR, which when using
|
||||
ECN expects ECT to be set even on retransmitted packets and ACKs.
|
||||
|
||||
Previous to this addition of TCP_ECN_ECT_PERMANENT, CCAs which can use
|
||||
ECN but don't "need" it did not have a way to indicate that ECT should
|
||||
be set on retransmissions/ACKs.
|
||||
|
||||
Signed-off-by: Adithya Abraham Philip <abrahamphilip@google.com>
|
||||
Signed-off-by: Neal Cardwell <ncardwell@google.com>
|
||||
Change-Id: I8b048eaab35e136fe6501ef6cd89fd9faa15e6d2
|
||||
Signed-off-by: Alexandre Frade <kernel@xanmod.org>
|
||||
---
|
||||
include/net/tcp.h | 1 +
|
||||
net/ipv4/tcp_bbr.c | 3 +++
|
||||
net/ipv4/tcp_output.c | 3 ++-
|
||||
3 files changed, 6 insertions(+), 1 deletion(-)
|
||||
|
||||
--- a/include/net/tcp.h
|
||||
+++ b/include/net/tcp.h
|
||||
@@ -376,6 +376,7 @@ static inline void tcp_dec_quickack_mode
|
||||
#define TCP_ECN_DEMAND_CWR 4
|
||||
#define TCP_ECN_SEEN 8
|
||||
#define TCP_ECN_LOW 16
|
||||
+#define TCP_ECN_ECT_PERMANENT 32
|
||||
|
||||
enum tcp_tw_status {
|
||||
TCP_TW_SUCCESS = 0,
|
||||
--- a/net/ipv4/tcp_bbr.c
|
||||
+++ b/net/ipv4/tcp_bbr.c
|
||||
@@ -2151,6 +2151,9 @@ __bpf_kfunc static void bbr_init(struct
|
||||
bbr->plb.pause_until = 0;
|
||||
|
||||
tp->fast_ack_mode = bbr_fast_ack_mode ? 1 : 0;
|
||||
+
|
||||
+ if (bbr_can_use_ecn(sk))
|
||||
+ tp->ecn_flags |= TCP_ECN_ECT_PERMANENT;
|
||||
}
|
||||
|
||||
/* BBR marks the current round trip as a loss round. */
|
||||
--- a/net/ipv4/tcp_output.c
|
||||
+++ b/net/ipv4/tcp_output.c
|
||||
@@ -390,7 +390,8 @@ static void tcp_ecn_send(struct sock *sk
|
||||
th->cwr = 1;
|
||||
skb_shinfo(skb)->gso_type |= SKB_GSO_TCP_ECN;
|
||||
}
|
||||
- } else if (!tcp_ca_needs_ecn(sk)) {
|
||||
+ } else if (!(tp->ecn_flags & TCP_ECN_ECT_PERMANENT) &&
|
||||
+ !tcp_ca_needs_ecn(sk)) {
|
||||
/* ACK or retransmitted segment: clear ECT|CE */
|
||||
INET_ECN_dontxmit(sk);
|
||||
}
|
38
debian/patches/misc-bbr3/0018-tcp-export-TCPI_OPT_ECN_LOW-in-tcp_info-tcpi_options.patch
vendored
Normal file
38
debian/patches/misc-bbr3/0018-tcp-export-TCPI_OPT_ECN_LOW-in-tcp_info-tcpi_options.patch
vendored
Normal file
@ -0,0 +1,38 @@
|
||||
From 5d7cb61552d374bcaaa95022129b4ca1eace1c33 Mon Sep 17 00:00:00 2001
|
||||
From: Neal Cardwell <ncardwell@google.com>
|
||||
Date: Sun, 23 Jul 2023 23:25:34 -0400
|
||||
Subject: [PATCH 18/19] tcp: export TCPI_OPT_ECN_LOW in tcp_info tcpi_options
|
||||
field
|
||||
|
||||
Analogous to other important ECN information, export TCPI_OPT_ECN_LOW
|
||||
in tcp_info tcpi_options field.
|
||||
|
||||
Signed-off-by: Neal Cardwell <ncardwell@google.com>
|
||||
Change-Id: I08d8d8c7e8780e6e37df54038ee50301ac5a0320
|
||||
Signed-off-by: Alexandre Frade <kernel@xanmod.org>
|
||||
---
|
||||
include/uapi/linux/tcp.h | 1 +
|
||||
net/ipv4/tcp.c | 2 ++
|
||||
2 files changed, 3 insertions(+)
|
||||
|
||||
--- a/include/uapi/linux/tcp.h
|
||||
+++ b/include/uapi/linux/tcp.h
|
||||
@@ -178,6 +178,7 @@ enum tcp_fastopen_client_fail {
|
||||
#define TCPI_OPT_ECN_SEEN 16 /* we received at least one packet with ECT */
|
||||
#define TCPI_OPT_SYN_DATA 32 /* SYN-ACK acked data in SYN sent or rcvd */
|
||||
#define TCPI_OPT_USEC_TS 64 /* usec timestamps */
|
||||
+#define TCPI_OPT_ECN_LOW 128 /* Low-latency ECN configured at init */
|
||||
|
||||
/*
|
||||
* Sender's congestion state indicating normal or abnormal situations
|
||||
--- a/net/ipv4/tcp.c
|
||||
+++ b/net/ipv4/tcp.c
|
||||
@@ -3850,6 +3850,8 @@ void tcp_get_info(struct sock *sk, struc
|
||||
info->tcpi_options |= TCPI_OPT_ECN;
|
||||
if (tp->ecn_flags & TCP_ECN_SEEN)
|
||||
info->tcpi_options |= TCPI_OPT_ECN_SEEN;
|
||||
+ if (tp->ecn_flags & TCP_ECN_LOW)
|
||||
+ info->tcpi_options |= TCPI_OPT_ECN_LOW;
|
||||
if (tp->syn_data_acked)
|
||||
info->tcpi_options |= TCPI_OPT_SYN_DATA;
|
||||
if (tp->tcp_usec_ts)
|
42
debian/patches/misc-bbr3/0019-x86-cfi-bpf-Add-tso_segs-and-skb_marked_lost-to-bpf_.patch
vendored
Normal file
42
debian/patches/misc-bbr3/0019-x86-cfi-bpf-Add-tso_segs-and-skb_marked_lost-to-bpf_.patch
vendored
Normal file
@ -0,0 +1,42 @@
|
||||
From 39838c2f0b09bec02004c092904aada85da2bc2e Mon Sep 17 00:00:00 2001
|
||||
From: Alexandre Frade <kernel@xanmod.org>
|
||||
Date: Mon, 11 Mar 2024 12:01:13 -0300
|
||||
Subject: [PATCH 19/19] x86/cfi,bpf: Add tso_segs and skb_marked_lost to
|
||||
bpf_struct_ops CFI
|
||||
|
||||
Rebased-by: Oleksandr Natalenko <oleksandr@natalenko.name>
|
||||
[ https://github.com/sirlucjan/kernel-patches/blob/master/6.8/bbr3-patches/0001-tcp-bbr3-initial-import.patch ]
|
||||
Signed-off-by: Alexandre Frade <kernel@xanmod.org>
|
||||
---
|
||||
net/ipv4/bpf_tcp_ca.c | 9 +++++++--
|
||||
1 file changed, 7 insertions(+), 2 deletions(-)
|
||||
|
||||
--- a/net/ipv4/bpf_tcp_ca.c
|
||||
+++ b/net/ipv4/bpf_tcp_ca.c
|
||||
@@ -305,11 +305,15 @@ static void bpf_tcp_ca_pkts_acked(struct
|
||||
{
|
||||
}
|
||||
|
||||
-static u32 bpf_tcp_ca_min_tso_segs(struct sock *sk)
|
||||
+static u32 bpf_tcp_ca_tso_segs(struct sock *sk, unsigned int mss_now)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
+static void bpf_tcp_ca_skb_marked_lost(struct sock *sk, const struct sk_buff *skb)
|
||||
+{
|
||||
+}
|
||||
+
|
||||
static void bpf_tcp_ca_cong_control(struct sock *sk, u32 ack, int flag,
|
||||
const struct rate_sample *rs)
|
||||
{
|
||||
@@ -340,7 +344,8 @@ static struct tcp_congestion_ops __bpf_o
|
||||
.cwnd_event = bpf_tcp_ca_cwnd_event,
|
||||
.in_ack_event = bpf_tcp_ca_in_ack_event,
|
||||
.pkts_acked = bpf_tcp_ca_pkts_acked,
|
||||
- .min_tso_segs = bpf_tcp_ca_min_tso_segs,
|
||||
+ .tso_segs = bpf_tcp_ca_tso_segs,
|
||||
+ .skb_marked_lost = bpf_tcp_ca_skb_marked_lost,
|
||||
.cong_control = bpf_tcp_ca_cong_control,
|
||||
.undo_cwnd = bpf_tcp_ca_undo_cwnd,
|
||||
.sndbuf_expand = bpf_tcp_ca_sndbuf_expand,
|
375
debian/patches/misc-ntsync5/0001-ntsync-Introduce-NTSYNC_IOC_WAIT_ANY.patch
vendored
Normal file
375
debian/patches/misc-ntsync5/0001-ntsync-Introduce-NTSYNC_IOC_WAIT_ANY.patch
vendored
Normal file
@ -0,0 +1,375 @@
|
||||
From 60b01019526236e40466cf20bf1192074e5e1a7c Mon Sep 17 00:00:00 2001
|
||||
From: Elizabeth Figura <zfigura@codeweavers.com>
|
||||
Date: Sun, 19 May 2024 15:24:27 -0500
|
||||
Subject: ntsync: Introduce NTSYNC_IOC_WAIT_ANY.
|
||||
|
||||
This corresponds to part of the functionality of the NT syscall
|
||||
NtWaitForMultipleObjects(). Specifically, it implements the behaviour where
|
||||
the third argument (wait_any) is TRUE, and it does not handle alertable waits.
|
||||
Those features have been split out into separate patches to ease review.
|
||||
|
||||
This patch therefore implements the wait/wake infrastructure which comprises the
|
||||
core of ntsync's functionality.
|
||||
|
||||
NTSYNC_IOC_WAIT_ANY is a vectored wait function similar to poll(). Unlike
|
||||
poll(), it "consumes" objects when they are signaled. For semaphores, this means
|
||||
decreasing one from the internal counter. At most one object can be consumed by
|
||||
this function.
|
||||
|
||||
This wait/wake model is fundamentally different from that used anywhere else in
|
||||
the kernel, and for that reason ntsync does not use any existing infrastructure,
|
||||
such as futexes, kernel mutexes or semaphores, or wait_event().
|
||||
|
||||
Up to 64 objects can be waited on at once. As soon as one is signaled, the
|
||||
object with the lowest index is consumed, and that index is returned via the
|
||||
"index" field.
|
||||
|
||||
A timeout is supported. The timeout is passed as a u64 nanosecond value, which
|
||||
represents absolute time measured against either the MONOTONIC or REALTIME clock
|
||||
(controlled by the flags argument). If U64_MAX is passed, the ioctl waits
|
||||
indefinitely.
|
||||
|
||||
This ioctl validates that all objects belong to the relevant device. This is not
|
||||
necessary for any technical reason related to NTSYNC_IOC_WAIT_ANY, but will be
|
||||
necessary for NTSYNC_IOC_WAIT_ALL introduced in the following patch.
|
||||
|
||||
Some padding fields are added for alignment and for fields which will be added
|
||||
in future patches (split out to ease review).
|
||||
|
||||
Signed-off-by: Elizabeth Figura <zfigura@codeweavers.com>
|
||||
---
|
||||
drivers/misc/ntsync.c | 245 ++++++++++++++++++++++++++++++++++++
|
||||
include/uapi/linux/ntsync.h | 14 +++
|
||||
2 files changed, 259 insertions(+)
|
||||
|
||||
--- a/drivers/misc/ntsync.c
|
||||
+++ b/drivers/misc/ntsync.c
|
||||
@@ -6,11 +6,16 @@
|
||||
*/
|
||||
|
||||
#include <linux/anon_inodes.h>
|
||||
+#include <linux/atomic.h>
|
||||
#include <linux/file.h>
|
||||
#include <linux/fs.h>
|
||||
+#include <linux/hrtimer.h>
|
||||
+#include <linux/ktime.h>
|
||||
#include <linux/miscdevice.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/overflow.h>
|
||||
+#include <linux/sched.h>
|
||||
+#include <linux/sched/signal.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/spinlock.h>
|
||||
#include <uapi/linux/ntsync.h>
|
||||
@@ -30,6 +35,8 @@ enum ntsync_type {
|
||||
*
|
||||
* Both rely on struct file for reference counting. Individual
|
||||
* ntsync_obj objects take a reference to the device when created.
|
||||
+ * Wait operations take a reference to each object being waited on for
|
||||
+ * the duration of the wait.
|
||||
*/
|
||||
|
||||
struct ntsync_obj {
|
||||
@@ -47,12 +54,55 @@ struct ntsync_obj {
|
||||
__u32 max;
|
||||
} sem;
|
||||
} u;
|
||||
+
|
||||
+ struct list_head any_waiters;
|
||||
+};
|
||||
+
|
||||
+struct ntsync_q_entry {
|
||||
+ struct list_head node;
|
||||
+ struct ntsync_q *q;
|
||||
+ struct ntsync_obj *obj;
|
||||
+ __u32 index;
|
||||
+};
|
||||
+
|
||||
+struct ntsync_q {
|
||||
+ struct task_struct *task;
|
||||
+
|
||||
+ /*
|
||||
+ * Protected via atomic_try_cmpxchg(). Only the thread that wins the
|
||||
+ * compare-and-swap may actually change object states and wake this
|
||||
+ * task.
|
||||
+ */
|
||||
+ atomic_t signaled;
|
||||
+
|
||||
+ __u32 count;
|
||||
+ struct ntsync_q_entry entries[];
|
||||
};
|
||||
|
||||
struct ntsync_device {
|
||||
struct file *file;
|
||||
};
|
||||
|
||||
+static void try_wake_any_sem(struct ntsync_obj *sem)
|
||||
+{
|
||||
+ struct ntsync_q_entry *entry;
|
||||
+
|
||||
+ lockdep_assert_held(&sem->lock);
|
||||
+
|
||||
+ list_for_each_entry(entry, &sem->any_waiters, node) {
|
||||
+ struct ntsync_q *q = entry->q;
|
||||
+ int signaled = -1;
|
||||
+
|
||||
+ if (!sem->u.sem.count)
|
||||
+ break;
|
||||
+
|
||||
+ if (atomic_try_cmpxchg(&q->signaled, &signaled, entry->index)) {
|
||||
+ sem->u.sem.count--;
|
||||
+ wake_up_process(q->task);
|
||||
+ }
|
||||
+ }
|
||||
+}
|
||||
+
|
||||
/*
|
||||
* Actually change the semaphore state, returning -EOVERFLOW if it is made
|
||||
* invalid.
|
||||
@@ -88,6 +138,8 @@ static int ntsync_sem_post(struct ntsync
|
||||
|
||||
prev_count = sem->u.sem.count;
|
||||
ret = post_sem_state(sem, args);
|
||||
+ if (!ret)
|
||||
+ try_wake_any_sem(sem);
|
||||
|
||||
spin_unlock(&sem->lock);
|
||||
|
||||
@@ -141,6 +193,7 @@ static struct ntsync_obj *ntsync_alloc_o
|
||||
obj->dev = dev;
|
||||
get_file(dev->file);
|
||||
spin_lock_init(&obj->lock);
|
||||
+ INIT_LIST_HEAD(&obj->any_waiters);
|
||||
|
||||
return obj;
|
||||
}
|
||||
@@ -191,6 +244,196 @@ static int ntsync_create_sem(struct ntsy
|
||||
return put_user(fd, &user_args->sem);
|
||||
}
|
||||
|
||||
+static struct ntsync_obj *get_obj(struct ntsync_device *dev, int fd)
|
||||
+{
|
||||
+ struct file *file = fget(fd);
|
||||
+ struct ntsync_obj *obj;
|
||||
+
|
||||
+ if (!file)
|
||||
+ return NULL;
|
||||
+
|
||||
+ if (file->f_op != &ntsync_obj_fops) {
|
||||
+ fput(file);
|
||||
+ return NULL;
|
||||
+ }
|
||||
+
|
||||
+ obj = file->private_data;
|
||||
+ if (obj->dev != dev) {
|
||||
+ fput(file);
|
||||
+ return NULL;
|
||||
+ }
|
||||
+
|
||||
+ return obj;
|
||||
+}
|
||||
+
|
||||
+static void put_obj(struct ntsync_obj *obj)
|
||||
+{
|
||||
+ fput(obj->file);
|
||||
+}
|
||||
+
|
||||
+static int ntsync_schedule(const struct ntsync_q *q, const struct ntsync_wait_args *args)
|
||||
+{
|
||||
+ ktime_t timeout = ns_to_ktime(args->timeout);
|
||||
+ clockid_t clock = CLOCK_MONOTONIC;
|
||||
+ ktime_t *timeout_ptr;
|
||||
+ int ret = 0;
|
||||
+
|
||||
+ timeout_ptr = (args->timeout == U64_MAX ? NULL : &timeout);
|
||||
+
|
||||
+ if (args->flags & NTSYNC_WAIT_REALTIME)
|
||||
+ clock = CLOCK_REALTIME;
|
||||
+
|
||||
+ do {
|
||||
+ if (signal_pending(current)) {
|
||||
+ ret = -ERESTARTSYS;
|
||||
+ break;
|
||||
+ }
|
||||
+
|
||||
+ set_current_state(TASK_INTERRUPTIBLE);
|
||||
+ if (atomic_read(&q->signaled) != -1) {
|
||||
+ ret = 0;
|
||||
+ break;
|
||||
+ }
|
||||
+ ret = schedule_hrtimeout_range_clock(timeout_ptr, 0, HRTIMER_MODE_ABS, clock);
|
||||
+ } while (ret < 0);
|
||||
+ __set_current_state(TASK_RUNNING);
|
||||
+
|
||||
+ return ret;
|
||||
+}
|
||||
+
|
||||
+/*
|
||||
+ * Allocate and initialize the ntsync_q structure, but do not queue us yet.
|
||||
+ */
|
||||
+static int setup_wait(struct ntsync_device *dev,
|
||||
+ const struct ntsync_wait_args *args,
|
||||
+ struct ntsync_q **ret_q)
|
||||
+{
|
||||
+ const __u32 count = args->count;
|
||||
+ int fds[NTSYNC_MAX_WAIT_COUNT];
|
||||
+ struct ntsync_q *q;
|
||||
+ __u32 i, j;
|
||||
+
|
||||
+ if (args->pad[0] || args->pad[1] || args->pad[2] || (args->flags & ~NTSYNC_WAIT_REALTIME))
|
||||
+ return -EINVAL;
|
||||
+
|
||||
+ if (args->count > NTSYNC_MAX_WAIT_COUNT)
|
||||
+ return -EINVAL;
|
||||
+
|
||||
+ if (copy_from_user(fds, u64_to_user_ptr(args->objs),
|
||||
+ array_size(count, sizeof(*fds))))
|
||||
+ return -EFAULT;
|
||||
+
|
||||
+ q = kmalloc(struct_size(q, entries, count), GFP_KERNEL);
|
||||
+ if (!q)
|
||||
+ return -ENOMEM;
|
||||
+ q->task = current;
|
||||
+ atomic_set(&q->signaled, -1);
|
||||
+ q->count = count;
|
||||
+
|
||||
+ for (i = 0; i < count; i++) {
|
||||
+ struct ntsync_q_entry *entry = &q->entries[i];
|
||||
+ struct ntsync_obj *obj = get_obj(dev, fds[i]);
|
||||
+
|
||||
+ if (!obj)
|
||||
+ goto err;
|
||||
+
|
||||
+ entry->obj = obj;
|
||||
+ entry->q = q;
|
||||
+ entry->index = i;
|
||||
+ }
|
||||
+
|
||||
+ *ret_q = q;
|
||||
+ return 0;
|
||||
+
|
||||
+err:
|
||||
+ for (j = 0; j < i; j++)
|
||||
+ put_obj(q->entries[j].obj);
|
||||
+ kfree(q);
|
||||
+ return -EINVAL;
|
||||
+}
|
||||
+
|
||||
+static void try_wake_any_obj(struct ntsync_obj *obj)
|
||||
+{
|
||||
+ switch (obj->type) {
|
||||
+ case NTSYNC_TYPE_SEM:
|
||||
+ try_wake_any_sem(obj);
|
||||
+ break;
|
||||
+ }
|
||||
+}
|
||||
+
|
||||
+static int ntsync_wait_any(struct ntsync_device *dev, void __user *argp)
|
||||
+{
|
||||
+ struct ntsync_wait_args args;
|
||||
+ struct ntsync_q *q;
|
||||
+ int signaled;
|
||||
+ __u32 i;
|
||||
+ int ret;
|
||||
+
|
||||
+ if (copy_from_user(&args, argp, sizeof(args)))
|
||||
+ return -EFAULT;
|
||||
+
|
||||
+ ret = setup_wait(dev, &args, &q);
|
||||
+ if (ret < 0)
|
||||
+ return ret;
|
||||
+
|
||||
+ /* queue ourselves */
|
||||
+
|
||||
+ for (i = 0; i < args.count; i++) {
|
||||
+ struct ntsync_q_entry *entry = &q->entries[i];
|
||||
+ struct ntsync_obj *obj = entry->obj;
|
||||
+
|
||||
+ spin_lock(&obj->lock);
|
||||
+ list_add_tail(&entry->node, &obj->any_waiters);
|
||||
+ spin_unlock(&obj->lock);
|
||||
+ }
|
||||
+
|
||||
+ /* check if we are already signaled */
|
||||
+
|
||||
+ for (i = 0; i < args.count; i++) {
|
||||
+ struct ntsync_obj *obj = q->entries[i].obj;
|
||||
+
|
||||
+ if (atomic_read(&q->signaled) != -1)
|
||||
+ break;
|
||||
+
|
||||
+ spin_lock(&obj->lock);
|
||||
+ try_wake_any_obj(obj);
|
||||
+ spin_unlock(&obj->lock);
|
||||
+ }
|
||||
+
|
||||
+ /* sleep */
|
||||
+
|
||||
+ ret = ntsync_schedule(q, &args);
|
||||
+
|
||||
+ /* and finally, unqueue */
|
||||
+
|
||||
+ for (i = 0; i < args.count; i++) {
|
||||
+ struct ntsync_q_entry *entry = &q->entries[i];
|
||||
+ struct ntsync_obj *obj = entry->obj;
|
||||
+
|
||||
+ spin_lock(&obj->lock);
|
||||
+ list_del(&entry->node);
|
||||
+ spin_unlock(&obj->lock);
|
||||
+
|
||||
+ put_obj(obj);
|
||||
+ }
|
||||
+
|
||||
+ signaled = atomic_read(&q->signaled);
|
||||
+ if (signaled != -1) {
|
||||
+ struct ntsync_wait_args __user *user_args = argp;
|
||||
+
|
||||
+ /* even if we caught a signal, we need to communicate success */
|
||||
+ ret = 0;
|
||||
+
|
||||
+ if (put_user(signaled, &user_args->index))
|
||||
+ ret = -EFAULT;
|
||||
+ } else if (!ret) {
|
||||
+ ret = -ETIMEDOUT;
|
||||
+ }
|
||||
+
|
||||
+ kfree(q);
|
||||
+ return ret;
|
||||
+}
|
||||
+
|
||||
static int ntsync_char_open(struct inode *inode, struct file *file)
|
||||
{
|
||||
struct ntsync_device *dev;
|
||||
@@ -222,6 +465,8 @@ static long ntsync_char_ioctl(struct fil
|
||||
switch (cmd) {
|
||||
case NTSYNC_IOC_CREATE_SEM:
|
||||
return ntsync_create_sem(dev, argp);
|
||||
+ case NTSYNC_IOC_WAIT_ANY:
|
||||
+ return ntsync_wait_any(dev, argp);
|
||||
default:
|
||||
return -ENOIOCTLCMD;
|
||||
}
|
||||
--- a/include/uapi/linux/ntsync.h
|
||||
+++ b/include/uapi/linux/ntsync.h
|
||||
@@ -16,7 +16,21 @@ struct ntsync_sem_args {
|
||||
__u32 max;
|
||||
};
|
||||
|
||||
+#define NTSYNC_WAIT_REALTIME 0x1
|
||||
+
|
||||
+struct ntsync_wait_args {
|
||||
+ __u64 timeout;
|
||||
+ __u64 objs;
|
||||
+ __u32 count;
|
||||
+ __u32 index;
|
||||
+ __u32 flags;
|
||||
+ __u32 pad[3];
|
||||
+};
|
||||
+
|
||||
+#define NTSYNC_MAX_WAIT_COUNT 64
|
||||
+
|
||||
#define NTSYNC_IOC_CREATE_SEM _IOWR('N', 0x80, struct ntsync_sem_args)
|
||||
+#define NTSYNC_IOC_WAIT_ANY _IOWR('N', 0x82, struct ntsync_wait_args)
|
||||
|
||||
#define NTSYNC_IOC_SEM_POST _IOWR('N', 0x81, __u32)
|
||||
|
532
debian/patches/misc-ntsync5/0002-ntsync-Introduce-NTSYNC_IOC_WAIT_ALL.patch
vendored
Normal file
532
debian/patches/misc-ntsync5/0002-ntsync-Introduce-NTSYNC_IOC_WAIT_ALL.patch
vendored
Normal file
@ -0,0 +1,532 @@
|
||||
From 73fc33606fcb7028ec1ee6027a361de4e85ab5d6 Mon Sep 17 00:00:00 2001
|
||||
From: Elizabeth Figura <zfigura@codeweavers.com>
|
||||
Date: Sun, 19 May 2024 15:24:28 -0500
|
||||
Subject: ntsync: Introduce NTSYNC_IOC_WAIT_ALL.
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
This is similar to NTSYNC_IOC_WAIT_ANY, but waits until all of the objects are
|
||||
simultaneously signaled, and then acquires all of them as a single atomic
|
||||
operation.
|
||||
|
||||
Because acquisition of multiple objects is atomic, some complex locking is
|
||||
required. We cannot simply spin-lock multiple objects simultaneously, as that
|
||||
may disable preëmption for a problematically long time.
|
||||
|
||||
Instead, modifying any object which may be involved in a wait-all operation takes
|
||||
a device-wide sleeping mutex, "wait_all_lock", instead of the normal object
|
||||
spinlock.
|
||||
|
||||
Because wait-for-all is a rare operation, in order to optimize wait-for-any,
|
||||
this lock is only taken when necessary. "all_hint" is used to mark objects which
|
||||
are involved in a wait-for-all operation, and if an object is not, only its
|
||||
spinlock is taken.
|
||||
|
||||
The locking scheme used here was written by Peter Zijlstra.
|
||||
|
||||
Signed-off-by: Elizabeth Figura <zfigura@codeweavers.com>
|
||||
---
|
||||
drivers/misc/ntsync.c | 334 ++++++++++++++++++++++++++++++++++--
|
||||
include/uapi/linux/ntsync.h | 1 +
|
||||
2 files changed, 322 insertions(+), 13 deletions(-)
|
||||
|
||||
--- a/drivers/misc/ntsync.c
|
||||
+++ b/drivers/misc/ntsync.c
|
||||
@@ -13,6 +13,7 @@
|
||||
#include <linux/ktime.h>
|
||||
#include <linux/miscdevice.h>
|
||||
#include <linux/module.h>
|
||||
+#include <linux/mutex.h>
|
||||
#include <linux/overflow.h>
|
||||
#include <linux/sched.h>
|
||||
#include <linux/sched/signal.h>
|
||||
@@ -41,6 +42,7 @@ enum ntsync_type {
|
||||
|
||||
struct ntsync_obj {
|
||||
spinlock_t lock;
|
||||
+ int dev_locked;
|
||||
|
||||
enum ntsync_type type;
|
||||
|
||||
@@ -55,7 +57,30 @@ struct ntsync_obj {
|
||||
} sem;
|
||||
} u;
|
||||
|
||||
+ /*
|
||||
+ * any_waiters is protected by the object lock, but all_waiters is
|
||||
+ * protected by the device wait_all_lock.
|
||||
+ */
|
||||
struct list_head any_waiters;
|
||||
+ struct list_head all_waiters;
|
||||
+
|
||||
+ /*
|
||||
+ * Hint describing how many tasks are queued on this object in a
|
||||
+ * wait-all operation.
|
||||
+ *
|
||||
+ * Any time we do a wake, we may need to wake "all" waiters as well as
|
||||
+ * "any" waiters. In order to atomically wake "all" waiters, we must
|
||||
+ * lock all of the objects, and that means grabbing the wait_all_lock
|
||||
+ * below (and, due to lock ordering rules, before locking this object).
|
||||
+ * However, wait-all is a rare operation, and grabbing the wait-all
|
||||
+ * lock for every wake would create unnecessary contention.
|
||||
+ * Therefore we first check whether all_hint is zero, and, if it is,
|
||||
+ * we skip trying to wake "all" waiters.
|
||||
+ *
|
||||
+ * Since wait requests must originate from user-space threads, we're
|
||||
+ * limited here by PID_MAX_LIMIT, so there's no risk of overflow.
|
||||
+ */
|
||||
+ atomic_t all_hint;
|
||||
};
|
||||
|
||||
struct ntsync_q_entry {
|
||||
@@ -75,19 +100,198 @@ struct ntsync_q {
|
||||
*/
|
||||
atomic_t signaled;
|
||||
|
||||
+ bool all;
|
||||
__u32 count;
|
||||
struct ntsync_q_entry entries[];
|
||||
};
|
||||
|
||||
struct ntsync_device {
|
||||
+ /*
|
||||
+ * Wait-all operations must atomically grab all objects, and be totally
|
||||
+ * ordered with respect to each other and wait-any operations.
|
||||
+ * If one thread is trying to acquire several objects, another thread
|
||||
+ * cannot touch the object at the same time.
|
||||
+ *
|
||||
+ * This device-wide lock is used to serialize wait-for-all
|
||||
+ * operations, and operations on an object that is involved in a
|
||||
+ * wait-for-all.
|
||||
+ */
|
||||
+ struct mutex wait_all_lock;
|
||||
+
|
||||
struct file *file;
|
||||
};
|
||||
|
||||
+/*
|
||||
+ * Single objects are locked using obj->lock.
|
||||
+ *
|
||||
+ * Multiple objects are 'locked' while holding dev->wait_all_lock.
|
||||
+ * In this case however, individual objects are not locked by holding
|
||||
+ * obj->lock, but by setting obj->dev_locked.
|
||||
+ *
|
||||
+ * This means that in order to lock a single object, the sequence is slightly
|
||||
+ * more complicated than usual. Specifically it needs to check obj->dev_locked
|
||||
+ * after acquiring obj->lock, if set, it needs to drop the lock and acquire
|
||||
+ * dev->wait_all_lock in order to serialize against the multi-object operation.
|
||||
+ */
|
||||
+
|
||||
+static void dev_lock_obj(struct ntsync_device *dev, struct ntsync_obj *obj)
|
||||
+{
|
||||
+ lockdep_assert_held(&dev->wait_all_lock);
|
||||
+ lockdep_assert(obj->dev == dev);
|
||||
+ spin_lock(&obj->lock);
|
||||
+ /*
|
||||
+ * By setting obj->dev_locked inside obj->lock, it is ensured that
|
||||
+ * anyone holding obj->lock must see the value.
|
||||
+ */
|
||||
+ obj->dev_locked = 1;
|
||||
+ spin_unlock(&obj->lock);
|
||||
+}
|
||||
+
|
||||
+static void dev_unlock_obj(struct ntsync_device *dev, struct ntsync_obj *obj)
|
||||
+{
|
||||
+ lockdep_assert_held(&dev->wait_all_lock);
|
||||
+ lockdep_assert(obj->dev == dev);
|
||||
+ spin_lock(&obj->lock);
|
||||
+ obj->dev_locked = 0;
|
||||
+ spin_unlock(&obj->lock);
|
||||
+}
|
||||
+
|
||||
+static void obj_lock(struct ntsync_obj *obj)
|
||||
+{
|
||||
+ struct ntsync_device *dev = obj->dev;
|
||||
+
|
||||
+ for (;;) {
|
||||
+ spin_lock(&obj->lock);
|
||||
+ if (likely(!obj->dev_locked))
|
||||
+ break;
|
||||
+
|
||||
+ spin_unlock(&obj->lock);
|
||||
+ mutex_lock(&dev->wait_all_lock);
|
||||
+ spin_lock(&obj->lock);
|
||||
+ /*
|
||||
+ * obj->dev_locked should be set and released under the same
|
||||
+ * wait_all_lock section, since we now own this lock, it should
|
||||
+ * be clear.
|
||||
+ */
|
||||
+ lockdep_assert(!obj->dev_locked);
|
||||
+ spin_unlock(&obj->lock);
|
||||
+ mutex_unlock(&dev->wait_all_lock);
|
||||
+ }
|
||||
+}
|
||||
+
|
||||
+static void obj_unlock(struct ntsync_obj *obj)
|
||||
+{
|
||||
+ spin_unlock(&obj->lock);
|
||||
+}
|
||||
+
|
||||
+static bool ntsync_lock_obj(struct ntsync_device *dev, struct ntsync_obj *obj)
|
||||
+{
|
||||
+ bool all;
|
||||
+
|
||||
+ obj_lock(obj);
|
||||
+ all = atomic_read(&obj->all_hint);
|
||||
+ if (unlikely(all)) {
|
||||
+ obj_unlock(obj);
|
||||
+ mutex_lock(&dev->wait_all_lock);
|
||||
+ dev_lock_obj(dev, obj);
|
||||
+ }
|
||||
+
|
||||
+ return all;
|
||||
+}
|
||||
+
|
||||
+static void ntsync_unlock_obj(struct ntsync_device *dev, struct ntsync_obj *obj, bool all)
|
||||
+{
|
||||
+ if (all) {
|
||||
+ dev_unlock_obj(dev, obj);
|
||||
+ mutex_unlock(&dev->wait_all_lock);
|
||||
+ } else {
|
||||
+ obj_unlock(obj);
|
||||
+ }
|
||||
+}
|
||||
+
|
||||
+#define ntsync_assert_held(obj) \
|
||||
+ lockdep_assert((lockdep_is_held(&(obj)->lock) != LOCK_STATE_NOT_HELD) || \
|
||||
+ ((lockdep_is_held(&(obj)->dev->wait_all_lock) != LOCK_STATE_NOT_HELD) && \
|
||||
+ (obj)->dev_locked))
|
||||
+
|
||||
+static bool is_signaled(struct ntsync_obj *obj)
|
||||
+{
|
||||
+ ntsync_assert_held(obj);
|
||||
+
|
||||
+ switch (obj->type) {
|
||||
+ case NTSYNC_TYPE_SEM:
|
||||
+ return !!obj->u.sem.count;
|
||||
+ }
|
||||
+
|
||||
+ WARN(1, "bad object type %#x\n", obj->type);
|
||||
+ return false;
|
||||
+}
|
||||
+
|
||||
+/*
|
||||
+ * "locked_obj" is an optional pointer to an object which is already locked and
|
||||
+ * should not be locked again. This is necessary so that changing an object's
|
||||
+ * state and waking it can be a single atomic operation.
|
||||
+ */
|
||||
+static void try_wake_all(struct ntsync_device *dev, struct ntsync_q *q,
|
||||
+ struct ntsync_obj *locked_obj)
|
||||
+{
|
||||
+ __u32 count = q->count;
|
||||
+ bool can_wake = true;
|
||||
+ int signaled = -1;
|
||||
+ __u32 i;
|
||||
+
|
||||
+ lockdep_assert_held(&dev->wait_all_lock);
|
||||
+ if (locked_obj)
|
||||
+ lockdep_assert(locked_obj->dev_locked);
|
||||
+
|
||||
+ for (i = 0; i < count; i++) {
|
||||
+ if (q->entries[i].obj != locked_obj)
|
||||
+ dev_lock_obj(dev, q->entries[i].obj);
|
||||
+ }
|
||||
+
|
||||
+ for (i = 0; i < count; i++) {
|
||||
+ if (!is_signaled(q->entries[i].obj)) {
|
||||
+ can_wake = false;
|
||||
+ break;
|
||||
+ }
|
||||
+ }
|
||||
+
|
||||
+ if (can_wake && atomic_try_cmpxchg(&q->signaled, &signaled, 0)) {
|
||||
+ for (i = 0; i < count; i++) {
|
||||
+ struct ntsync_obj *obj = q->entries[i].obj;
|
||||
+
|
||||
+ switch (obj->type) {
|
||||
+ case NTSYNC_TYPE_SEM:
|
||||
+ obj->u.sem.count--;
|
||||
+ break;
|
||||
+ }
|
||||
+ }
|
||||
+ wake_up_process(q->task);
|
||||
+ }
|
||||
+
|
||||
+ for (i = 0; i < count; i++) {
|
||||
+ if (q->entries[i].obj != locked_obj)
|
||||
+ dev_unlock_obj(dev, q->entries[i].obj);
|
||||
+ }
|
||||
+}
|
||||
+
|
||||
+static void try_wake_all_obj(struct ntsync_device *dev, struct ntsync_obj *obj)
|
||||
+{
|
||||
+ struct ntsync_q_entry *entry;
|
||||
+
|
||||
+ lockdep_assert_held(&dev->wait_all_lock);
|
||||
+ lockdep_assert(obj->dev_locked);
|
||||
+
|
||||
+ list_for_each_entry(entry, &obj->all_waiters, node)
|
||||
+ try_wake_all(dev, entry->q, obj);
|
||||
+}
|
||||
+
|
||||
static void try_wake_any_sem(struct ntsync_obj *sem)
|
||||
{
|
||||
struct ntsync_q_entry *entry;
|
||||
|
||||
- lockdep_assert_held(&sem->lock);
|
||||
+ ntsync_assert_held(sem);
|
||||
+ lockdep_assert(sem->type == NTSYNC_TYPE_SEM);
|
||||
|
||||
list_for_each_entry(entry, &sem->any_waiters, node) {
|
||||
struct ntsync_q *q = entry->q;
|
||||
@@ -111,7 +315,7 @@ static int post_sem_state(struct ntsync_
|
||||
{
|
||||
__u32 sum;
|
||||
|
||||
- lockdep_assert_held(&sem->lock);
|
||||
+ ntsync_assert_held(sem);
|
||||
|
||||
if (check_add_overflow(sem->u.sem.count, count, &sum) ||
|
||||
sum > sem->u.sem.max)
|
||||
@@ -123,9 +327,11 @@ static int post_sem_state(struct ntsync_
|
||||
|
||||
static int ntsync_sem_post(struct ntsync_obj *sem, void __user *argp)
|
||||
{
|
||||
+ struct ntsync_device *dev = sem->dev;
|
||||
__u32 __user *user_args = argp;
|
||||
__u32 prev_count;
|
||||
__u32 args;
|
||||
+ bool all;
|
||||
int ret;
|
||||
|
||||
if (copy_from_user(&args, argp, sizeof(args)))
|
||||
@@ -134,14 +340,17 @@ static int ntsync_sem_post(struct ntsync
|
||||
if (sem->type != NTSYNC_TYPE_SEM)
|
||||
return -EINVAL;
|
||||
|
||||
- spin_lock(&sem->lock);
|
||||
+ all = ntsync_lock_obj(dev, sem);
|
||||
|
||||
prev_count = sem->u.sem.count;
|
||||
ret = post_sem_state(sem, args);
|
||||
- if (!ret)
|
||||
+ if (!ret) {
|
||||
+ if (all)
|
||||
+ try_wake_all_obj(dev, sem);
|
||||
try_wake_any_sem(sem);
|
||||
+ }
|
||||
|
||||
- spin_unlock(&sem->lock);
|
||||
+ ntsync_unlock_obj(dev, sem, all);
|
||||
|
||||
if (!ret && put_user(prev_count, user_args))
|
||||
ret = -EFAULT;
|
||||
@@ -194,6 +403,8 @@ static struct ntsync_obj *ntsync_alloc_o
|
||||
get_file(dev->file);
|
||||
spin_lock_init(&obj->lock);
|
||||
INIT_LIST_HEAD(&obj->any_waiters);
|
||||
+ INIT_LIST_HEAD(&obj->all_waiters);
|
||||
+ atomic_set(&obj->all_hint, 0);
|
||||
|
||||
return obj;
|
||||
}
|
||||
@@ -305,7 +516,7 @@ static int ntsync_schedule(const struct
|
||||
* Allocate and initialize the ntsync_q structure, but do not queue us yet.
|
||||
*/
|
||||
static int setup_wait(struct ntsync_device *dev,
|
||||
- const struct ntsync_wait_args *args,
|
||||
+ const struct ntsync_wait_args *args, bool all,
|
||||
struct ntsync_q **ret_q)
|
||||
{
|
||||
const __u32 count = args->count;
|
||||
@@ -328,6 +539,7 @@ static int setup_wait(struct ntsync_devi
|
||||
return -ENOMEM;
|
||||
q->task = current;
|
||||
atomic_set(&q->signaled, -1);
|
||||
+ q->all = all;
|
||||
q->count = count;
|
||||
|
||||
for (i = 0; i < count; i++) {
|
||||
@@ -337,6 +549,16 @@ static int setup_wait(struct ntsync_devi
|
||||
if (!obj)
|
||||
goto err;
|
||||
|
||||
+ if (all) {
|
||||
+ /* Check that the objects are all distinct. */
|
||||
+ for (j = 0; j < i; j++) {
|
||||
+ if (obj == q->entries[j].obj) {
|
||||
+ put_obj(obj);
|
||||
+ goto err;
|
||||
+ }
|
||||
+ }
|
||||
+ }
|
||||
+
|
||||
entry->obj = obj;
|
||||
entry->q = q;
|
||||
entry->index = i;
|
||||
@@ -366,13 +588,14 @@ static int ntsync_wait_any(struct ntsync
|
||||
struct ntsync_wait_args args;
|
||||
struct ntsync_q *q;
|
||||
int signaled;
|
||||
+ bool all;
|
||||
__u32 i;
|
||||
int ret;
|
||||
|
||||
if (copy_from_user(&args, argp, sizeof(args)))
|
||||
return -EFAULT;
|
||||
|
||||
- ret = setup_wait(dev, &args, &q);
|
||||
+ ret = setup_wait(dev, &args, false, &q);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
|
||||
@@ -382,9 +605,9 @@ static int ntsync_wait_any(struct ntsync
|
||||
struct ntsync_q_entry *entry = &q->entries[i];
|
||||
struct ntsync_obj *obj = entry->obj;
|
||||
|
||||
- spin_lock(&obj->lock);
|
||||
+ all = ntsync_lock_obj(dev, obj);
|
||||
list_add_tail(&entry->node, &obj->any_waiters);
|
||||
- spin_unlock(&obj->lock);
|
||||
+ ntsync_unlock_obj(dev, obj, all);
|
||||
}
|
||||
|
||||
/* check if we are already signaled */
|
||||
@@ -395,9 +618,9 @@ static int ntsync_wait_any(struct ntsync
|
||||
if (atomic_read(&q->signaled) != -1)
|
||||
break;
|
||||
|
||||
- spin_lock(&obj->lock);
|
||||
+ all = ntsync_lock_obj(dev, obj);
|
||||
try_wake_any_obj(obj);
|
||||
- spin_unlock(&obj->lock);
|
||||
+ ntsync_unlock_obj(dev, obj, all);
|
||||
}
|
||||
|
||||
/* sleep */
|
||||
@@ -410,13 +633,94 @@ static int ntsync_wait_any(struct ntsync
|
||||
struct ntsync_q_entry *entry = &q->entries[i];
|
||||
struct ntsync_obj *obj = entry->obj;
|
||||
|
||||
- spin_lock(&obj->lock);
|
||||
+ all = ntsync_lock_obj(dev, obj);
|
||||
list_del(&entry->node);
|
||||
- spin_unlock(&obj->lock);
|
||||
+ ntsync_unlock_obj(dev, obj, all);
|
||||
+
|
||||
+ put_obj(obj);
|
||||
+ }
|
||||
+
|
||||
+ signaled = atomic_read(&q->signaled);
|
||||
+ if (signaled != -1) {
|
||||
+ struct ntsync_wait_args __user *user_args = argp;
|
||||
+
|
||||
+ /* even if we caught a signal, we need to communicate success */
|
||||
+ ret = 0;
|
||||
+
|
||||
+ if (put_user(signaled, &user_args->index))
|
||||
+ ret = -EFAULT;
|
||||
+ } else if (!ret) {
|
||||
+ ret = -ETIMEDOUT;
|
||||
+ }
|
||||
+
|
||||
+ kfree(q);
|
||||
+ return ret;
|
||||
+}
|
||||
+
|
||||
+static int ntsync_wait_all(struct ntsync_device *dev, void __user *argp)
|
||||
+{
|
||||
+ struct ntsync_wait_args args;
|
||||
+ struct ntsync_q *q;
|
||||
+ int signaled;
|
||||
+ __u32 i;
|
||||
+ int ret;
|
||||
+
|
||||
+ if (copy_from_user(&args, argp, sizeof(args)))
|
||||
+ return -EFAULT;
|
||||
+
|
||||
+ ret = setup_wait(dev, &args, true, &q);
|
||||
+ if (ret < 0)
|
||||
+ return ret;
|
||||
+
|
||||
+ /* queue ourselves */
|
||||
+
|
||||
+ mutex_lock(&dev->wait_all_lock);
|
||||
+
|
||||
+ for (i = 0; i < args.count; i++) {
|
||||
+ struct ntsync_q_entry *entry = &q->entries[i];
|
||||
+ struct ntsync_obj *obj = entry->obj;
|
||||
+
|
||||
+ atomic_inc(&obj->all_hint);
|
||||
+
|
||||
+ /*
|
||||
+ * obj->all_waiters is protected by dev->wait_all_lock rather
|
||||
+ * than obj->lock, so there is no need to acquire obj->lock
|
||||
+ * here.
|
||||
+ */
|
||||
+ list_add_tail(&entry->node, &obj->all_waiters);
|
||||
+ }
|
||||
+
|
||||
+ /* check if we are already signaled */
|
||||
+
|
||||
+ try_wake_all(dev, q, NULL);
|
||||
+
|
||||
+ mutex_unlock(&dev->wait_all_lock);
|
||||
+
|
||||
+ /* sleep */
|
||||
+
|
||||
+ ret = ntsync_schedule(q, &args);
|
||||
+
|
||||
+ /* and finally, unqueue */
|
||||
+
|
||||
+ mutex_lock(&dev->wait_all_lock);
|
||||
+
|
||||
+ for (i = 0; i < args.count; i++) {
|
||||
+ struct ntsync_q_entry *entry = &q->entries[i];
|
||||
+ struct ntsync_obj *obj = entry->obj;
|
||||
+
|
||||
+ /*
|
||||
+ * obj->all_waiters is protected by dev->wait_all_lock rather
|
||||
+ * than obj->lock, so there is no need to acquire it here.
|
||||
+ */
|
||||
+ list_del(&entry->node);
|
||||
+
|
||||
+ atomic_dec(&obj->all_hint);
|
||||
|
||||
put_obj(obj);
|
||||
}
|
||||
|
||||
+ mutex_unlock(&dev->wait_all_lock);
|
||||
+
|
||||
signaled = atomic_read(&q->signaled);
|
||||
if (signaled != -1) {
|
||||
struct ntsync_wait_args __user *user_args = argp;
|
||||
@@ -442,6 +746,8 @@ static int ntsync_char_open(struct inode
|
||||
if (!dev)
|
||||
return -ENOMEM;
|
||||
|
||||
+ mutex_init(&dev->wait_all_lock);
|
||||
+
|
||||
file->private_data = dev;
|
||||
dev->file = file;
|
||||
return nonseekable_open(inode, file);
|
||||
@@ -465,6 +771,8 @@ static long ntsync_char_ioctl(struct fil
|
||||
switch (cmd) {
|
||||
case NTSYNC_IOC_CREATE_SEM:
|
||||
return ntsync_create_sem(dev, argp);
|
||||
+ case NTSYNC_IOC_WAIT_ALL:
|
||||
+ return ntsync_wait_all(dev, argp);
|
||||
case NTSYNC_IOC_WAIT_ANY:
|
||||
return ntsync_wait_any(dev, argp);
|
||||
default:
|
||||
--- a/include/uapi/linux/ntsync.h
|
||||
+++ b/include/uapi/linux/ntsync.h
|
||||
@@ -31,6 +31,7 @@ struct ntsync_wait_args {
|
||||
|
||||
#define NTSYNC_IOC_CREATE_SEM _IOWR('N', 0x80, struct ntsync_sem_args)
|
||||
#define NTSYNC_IOC_WAIT_ANY _IOWR('N', 0x82, struct ntsync_wait_args)
|
||||
+#define NTSYNC_IOC_WAIT_ALL _IOWR('N', 0x83, struct ntsync_wait_args)
|
||||
|
||||
#define NTSYNC_IOC_SEM_POST _IOWR('N', 0x81, __u32)
|
||||
|
226
debian/patches/misc-ntsync5/0003-ntsync-Introduce-NTSYNC_IOC_CREATE_MUTEX.patch
vendored
Normal file
226
debian/patches/misc-ntsync5/0003-ntsync-Introduce-NTSYNC_IOC_CREATE_MUTEX.patch
vendored
Normal file
@ -0,0 +1,226 @@
|
||||
From fdeceab49078a80987c665ed837ee4f1b8a942a8 Mon Sep 17 00:00:00 2001
|
||||
From: Elizabeth Figura <zfigura@codeweavers.com>
|
||||
Date: Sun, 19 May 2024 15:24:29 -0500
|
||||
Subject: ntsync: Introduce NTSYNC_IOC_CREATE_MUTEX.
|
||||
|
||||
This corresponds to the NT syscall NtCreateMutant().
|
||||
|
||||
An NT mutex is recursive, with a 32-bit recursion counter. When acquired via
|
||||
NtWaitForMultipleObjects(), the recursion counter is incremented by one. The OS
|
||||
records the thread which acquired it.
|
||||
|
||||
The OS records the thread which acquired it. However, in order to keep this
|
||||
driver self-contained, the owning thread ID is managed by user-space, and passed
|
||||
as a parameter to all relevant ioctls.
|
||||
|
||||
The initial owner and recursion count, if any, are specified when the mutex is
|
||||
created.
|
||||
|
||||
Signed-off-by: Elizabeth Figura <zfigura@codeweavers.com>
|
||||
---
|
||||
drivers/misc/ntsync.c | 77 +++++++++++++++++++++++++++++++++++--
|
||||
include/uapi/linux/ntsync.h | 10 ++++-
|
||||
2 files changed, 83 insertions(+), 4 deletions(-)
|
||||
|
||||
--- a/drivers/misc/ntsync.c
|
||||
+++ b/drivers/misc/ntsync.c
|
||||
@@ -25,6 +25,7 @@
|
||||
|
||||
enum ntsync_type {
|
||||
NTSYNC_TYPE_SEM,
|
||||
+ NTSYNC_TYPE_MUTEX,
|
||||
};
|
||||
|
||||
/*
|
||||
@@ -55,6 +56,10 @@ struct ntsync_obj {
|
||||
__u32 count;
|
||||
__u32 max;
|
||||
} sem;
|
||||
+ struct {
|
||||
+ __u32 count;
|
||||
+ pid_t owner;
|
||||
+ } mutex;
|
||||
} u;
|
||||
|
||||
/*
|
||||
@@ -92,6 +97,7 @@ struct ntsync_q_entry {
|
||||
|
||||
struct ntsync_q {
|
||||
struct task_struct *task;
|
||||
+ __u32 owner;
|
||||
|
||||
/*
|
||||
* Protected via atomic_try_cmpxchg(). Only the thread that wins the
|
||||
@@ -214,13 +220,17 @@ static void ntsync_unlock_obj(struct nts
|
||||
((lockdep_is_held(&(obj)->dev->wait_all_lock) != LOCK_STATE_NOT_HELD) && \
|
||||
(obj)->dev_locked))
|
||||
|
||||
-static bool is_signaled(struct ntsync_obj *obj)
|
||||
+static bool is_signaled(struct ntsync_obj *obj, __u32 owner)
|
||||
{
|
||||
ntsync_assert_held(obj);
|
||||
|
||||
switch (obj->type) {
|
||||
case NTSYNC_TYPE_SEM:
|
||||
return !!obj->u.sem.count;
|
||||
+ case NTSYNC_TYPE_MUTEX:
|
||||
+ if (obj->u.mutex.owner && obj->u.mutex.owner != owner)
|
||||
+ return false;
|
||||
+ return obj->u.mutex.count < UINT_MAX;
|
||||
}
|
||||
|
||||
WARN(1, "bad object type %#x\n", obj->type);
|
||||
@@ -250,7 +260,7 @@ static void try_wake_all(struct ntsync_d
|
||||
}
|
||||
|
||||
for (i = 0; i < count; i++) {
|
||||
- if (!is_signaled(q->entries[i].obj)) {
|
||||
+ if (!is_signaled(q->entries[i].obj, q->owner)) {
|
||||
can_wake = false;
|
||||
break;
|
||||
}
|
||||
@@ -264,6 +274,10 @@ static void try_wake_all(struct ntsync_d
|
||||
case NTSYNC_TYPE_SEM:
|
||||
obj->u.sem.count--;
|
||||
break;
|
||||
+ case NTSYNC_TYPE_MUTEX:
|
||||
+ obj->u.mutex.count++;
|
||||
+ obj->u.mutex.owner = q->owner;
|
||||
+ break;
|
||||
}
|
||||
}
|
||||
wake_up_process(q->task);
|
||||
@@ -307,6 +321,30 @@ static void try_wake_any_sem(struct ntsy
|
||||
}
|
||||
}
|
||||
|
||||
+static void try_wake_any_mutex(struct ntsync_obj *mutex)
|
||||
+{
|
||||
+ struct ntsync_q_entry *entry;
|
||||
+
|
||||
+ ntsync_assert_held(mutex);
|
||||
+ lockdep_assert(mutex->type == NTSYNC_TYPE_MUTEX);
|
||||
+
|
||||
+ list_for_each_entry(entry, &mutex->any_waiters, node) {
|
||||
+ struct ntsync_q *q = entry->q;
|
||||
+ int signaled = -1;
|
||||
+
|
||||
+ if (mutex->u.mutex.count == UINT_MAX)
|
||||
+ break;
|
||||
+ if (mutex->u.mutex.owner && mutex->u.mutex.owner != q->owner)
|
||||
+ continue;
|
||||
+
|
||||
+ if (atomic_try_cmpxchg(&q->signaled, &signaled, entry->index)) {
|
||||
+ mutex->u.mutex.count++;
|
||||
+ mutex->u.mutex.owner = q->owner;
|
||||
+ wake_up_process(q->task);
|
||||
+ }
|
||||
+ }
|
||||
+}
|
||||
+
|
||||
/*
|
||||
* Actually change the semaphore state, returning -EOVERFLOW if it is made
|
||||
* invalid.
|
||||
@@ -455,6 +493,33 @@ static int ntsync_create_sem(struct ntsy
|
||||
return put_user(fd, &user_args->sem);
|
||||
}
|
||||
|
||||
+static int ntsync_create_mutex(struct ntsync_device *dev, void __user *argp)
|
||||
+{
|
||||
+ struct ntsync_mutex_args __user *user_args = argp;
|
||||
+ struct ntsync_mutex_args args;
|
||||
+ struct ntsync_obj *mutex;
|
||||
+ int fd;
|
||||
+
|
||||
+ if (copy_from_user(&args, argp, sizeof(args)))
|
||||
+ return -EFAULT;
|
||||
+
|
||||
+ if (!args.owner != !args.count)
|
||||
+ return -EINVAL;
|
||||
+
|
||||
+ mutex = ntsync_alloc_obj(dev, NTSYNC_TYPE_MUTEX);
|
||||
+ if (!mutex)
|
||||
+ return -ENOMEM;
|
||||
+ mutex->u.mutex.count = args.count;
|
||||
+ mutex->u.mutex.owner = args.owner;
|
||||
+ fd = ntsync_obj_get_fd(mutex);
|
||||
+ if (fd < 0) {
|
||||
+ kfree(mutex);
|
||||
+ return fd;
|
||||
+ }
|
||||
+
|
||||
+ return put_user(fd, &user_args->mutex);
|
||||
+}
|
||||
+
|
||||
static struct ntsync_obj *get_obj(struct ntsync_device *dev, int fd)
|
||||
{
|
||||
struct file *file = fget(fd);
|
||||
@@ -524,7 +589,7 @@ static int setup_wait(struct ntsync_devi
|
||||
struct ntsync_q *q;
|
||||
__u32 i, j;
|
||||
|
||||
- if (args->pad[0] || args->pad[1] || args->pad[2] || (args->flags & ~NTSYNC_WAIT_REALTIME))
|
||||
+ if (args->pad[0] || args->pad[1] || (args->flags & ~NTSYNC_WAIT_REALTIME))
|
||||
return -EINVAL;
|
||||
|
||||
if (args->count > NTSYNC_MAX_WAIT_COUNT)
|
||||
@@ -538,6 +603,7 @@ static int setup_wait(struct ntsync_devi
|
||||
if (!q)
|
||||
return -ENOMEM;
|
||||
q->task = current;
|
||||
+ q->owner = args->owner;
|
||||
atomic_set(&q->signaled, -1);
|
||||
q->all = all;
|
||||
q->count = count;
|
||||
@@ -580,6 +646,9 @@ static void try_wake_any_obj(struct ntsy
|
||||
case NTSYNC_TYPE_SEM:
|
||||
try_wake_any_sem(obj);
|
||||
break;
|
||||
+ case NTSYNC_TYPE_MUTEX:
|
||||
+ try_wake_any_mutex(obj);
|
||||
+ break;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -769,6 +838,8 @@ static long ntsync_char_ioctl(struct fil
|
||||
void __user *argp = (void __user *)parm;
|
||||
|
||||
switch (cmd) {
|
||||
+ case NTSYNC_IOC_CREATE_MUTEX:
|
||||
+ return ntsync_create_mutex(dev, argp);
|
||||
case NTSYNC_IOC_CREATE_SEM:
|
||||
return ntsync_create_sem(dev, argp);
|
||||
case NTSYNC_IOC_WAIT_ALL:
|
||||
--- a/include/uapi/linux/ntsync.h
|
||||
+++ b/include/uapi/linux/ntsync.h
|
||||
@@ -16,6 +16,12 @@ struct ntsync_sem_args {
|
||||
__u32 max;
|
||||
};
|
||||
|
||||
+struct ntsync_mutex_args {
|
||||
+ __u32 mutex;
|
||||
+ __u32 owner;
|
||||
+ __u32 count;
|
||||
+};
|
||||
+
|
||||
#define NTSYNC_WAIT_REALTIME 0x1
|
||||
|
||||
struct ntsync_wait_args {
|
||||
@@ -24,7 +30,8 @@ struct ntsync_wait_args {
|
||||
__u32 count;
|
||||
__u32 index;
|
||||
__u32 flags;
|
||||
- __u32 pad[3];
|
||||
+ __u32 owner;
|
||||
+ __u32 pad[2];
|
||||
};
|
||||
|
||||
#define NTSYNC_MAX_WAIT_COUNT 64
|
||||
@@ -32,6 +39,7 @@ struct ntsync_wait_args {
|
||||
#define NTSYNC_IOC_CREATE_SEM _IOWR('N', 0x80, struct ntsync_sem_args)
|
||||
#define NTSYNC_IOC_WAIT_ANY _IOWR('N', 0x82, struct ntsync_wait_args)
|
||||
#define NTSYNC_IOC_WAIT_ALL _IOWR('N', 0x83, struct ntsync_wait_args)
|
||||
+#define NTSYNC_IOC_CREATE_MUTEX _IOWR('N', 0x84, struct ntsync_sem_args)
|
||||
|
||||
#define NTSYNC_IOC_SEM_POST _IOWR('N', 0x81, __u32)
|
||||
|
95
debian/patches/misc-ntsync5/0004-ntsync-Introduce-NTSYNC_IOC_MUTEX_UNLOCK.patch
vendored
Normal file
95
debian/patches/misc-ntsync5/0004-ntsync-Introduce-NTSYNC_IOC_MUTEX_UNLOCK.patch
vendored
Normal file
@ -0,0 +1,95 @@
|
||||
From cc9ade623cd90cd002fb86f3aa249af2e6e4019e Mon Sep 17 00:00:00 2001
|
||||
From: Elizabeth Figura <zfigura@codeweavers.com>
|
||||
Date: Sun, 19 May 2024 15:24:30 -0500
|
||||
Subject: ntsync: Introduce NTSYNC_IOC_MUTEX_UNLOCK.
|
||||
|
||||
This corresponds to the NT syscall NtReleaseMutant().
|
||||
|
||||
This syscall decrements the mutex's recursion count by one, and returns the
|
||||
previous value. If the mutex is not owned by the current task, the function
|
||||
instead fails and returns -EPERM.
|
||||
|
||||
Signed-off-by: Elizabeth Figura <zfigura@codeweavers.com>
|
||||
---
|
||||
drivers/misc/ntsync.c | 53 +++++++++++++++++++++++++++++++++++++
|
||||
include/uapi/linux/ntsync.h | 1 +
|
||||
2 files changed, 54 insertions(+)
|
||||
|
||||
--- a/drivers/misc/ntsync.c
|
||||
+++ b/drivers/misc/ntsync.c
|
||||
@@ -396,6 +396,57 @@ static int ntsync_sem_post(struct ntsync
|
||||
return ret;
|
||||
}
|
||||
|
||||
+/*
|
||||
+ * Actually change the mutex state, returning -EPERM if not the owner.
|
||||
+ */
|
||||
+static int unlock_mutex_state(struct ntsync_obj *mutex,
|
||||
+ const struct ntsync_mutex_args *args)
|
||||
+{
|
||||
+ ntsync_assert_held(mutex);
|
||||
+
|
||||
+ if (mutex->u.mutex.owner != args->owner)
|
||||
+ return -EPERM;
|
||||
+
|
||||
+ if (!--mutex->u.mutex.count)
|
||||
+ mutex->u.mutex.owner = 0;
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
||||
+static int ntsync_mutex_unlock(struct ntsync_obj *mutex, void __user *argp)
|
||||
+{
|
||||
+ struct ntsync_mutex_args __user *user_args = argp;
|
||||
+ struct ntsync_device *dev = mutex->dev;
|
||||
+ struct ntsync_mutex_args args;
|
||||
+ __u32 prev_count;
|
||||
+ bool all;
|
||||
+ int ret;
|
||||
+
|
||||
+ if (copy_from_user(&args, argp, sizeof(args)))
|
||||
+ return -EFAULT;
|
||||
+ if (!args.owner)
|
||||
+ return -EINVAL;
|
||||
+
|
||||
+ if (mutex->type != NTSYNC_TYPE_MUTEX)
|
||||
+ return -EINVAL;
|
||||
+
|
||||
+ all = ntsync_lock_obj(dev, mutex);
|
||||
+
|
||||
+ prev_count = mutex->u.mutex.count;
|
||||
+ ret = unlock_mutex_state(mutex, &args);
|
||||
+ if (!ret) {
|
||||
+ if (all)
|
||||
+ try_wake_all_obj(dev, mutex);
|
||||
+ try_wake_any_mutex(mutex);
|
||||
+ }
|
||||
+
|
||||
+ ntsync_unlock_obj(dev, mutex, all);
|
||||
+
|
||||
+ if (!ret && put_user(prev_count, &user_args->count))
|
||||
+ ret = -EFAULT;
|
||||
+
|
||||
+ return ret;
|
||||
+}
|
||||
+
|
||||
static int ntsync_obj_release(struct inode *inode, struct file *file)
|
||||
{
|
||||
struct ntsync_obj *obj = file->private_data;
|
||||
@@ -415,6 +466,8 @@ static long ntsync_obj_ioctl(struct file
|
||||
switch (cmd) {
|
||||
case NTSYNC_IOC_SEM_POST:
|
||||
return ntsync_sem_post(obj, argp);
|
||||
+ case NTSYNC_IOC_MUTEX_UNLOCK:
|
||||
+ return ntsync_mutex_unlock(obj, argp);
|
||||
default:
|
||||
return -ENOIOCTLCMD;
|
||||
}
|
||||
--- a/include/uapi/linux/ntsync.h
|
||||
+++ b/include/uapi/linux/ntsync.h
|
||||
@@ -42,5 +42,6 @@ struct ntsync_wait_args {
|
||||
#define NTSYNC_IOC_CREATE_MUTEX _IOWR('N', 0x84, struct ntsync_sem_args)
|
||||
|
||||
#define NTSYNC_IOC_SEM_POST _IOWR('N', 0x81, __u32)
|
||||
+#define NTSYNC_IOC_MUTEX_UNLOCK _IOWR('N', 0x85, struct ntsync_mutex_args)
|
||||
|
||||
#endif
|
156
debian/patches/misc-ntsync5/0005-ntsync-Introduce-NTSYNC_IOC_MUTEX_KILL.patch
vendored
Normal file
156
debian/patches/misc-ntsync5/0005-ntsync-Introduce-NTSYNC_IOC_MUTEX_KILL.patch
vendored
Normal file
@ -0,0 +1,156 @@
|
||||
From dca3fe766afa42e34f5d3f62c0f2850760663176 Mon Sep 17 00:00:00 2001
|
||||
From: Elizabeth Figura <zfigura@codeweavers.com>
|
||||
Date: Sun, 19 May 2024 15:24:31 -0500
|
||||
Subject: ntsync: Introduce NTSYNC_IOC_MUTEX_KILL.
|
||||
|
||||
This does not correspond to any NT syscall. Rather, when a thread dies, it
|
||||
should be called by the NT emulator for each mutex, with the TID of the dying
|
||||
thread.
|
||||
|
||||
NT mutexes are robust (in the pthread sense). When an NT thread dies, any
|
||||
mutexes it owned are immediately released. Acquisition of those mutexes by other
|
||||
threads will return a special value indicating that the mutex was abandoned,
|
||||
like EOWNERDEAD returned from pthread_mutex_lock(), and EOWNERDEAD is indeed
|
||||
used here for that purpose.
|
||||
|
||||
Signed-off-by: Elizabeth Figura <zfigura@codeweavers.com>
|
||||
---
|
||||
drivers/misc/ntsync.c | 61 +++++++++++++++++++++++++++++++++++--
|
||||
include/uapi/linux/ntsync.h | 1 +
|
||||
2 files changed, 60 insertions(+), 2 deletions(-)
|
||||
|
||||
--- a/drivers/misc/ntsync.c
|
||||
+++ b/drivers/misc/ntsync.c
|
||||
@@ -59,6 +59,7 @@ struct ntsync_obj {
|
||||
struct {
|
||||
__u32 count;
|
||||
pid_t owner;
|
||||
+ bool ownerdead;
|
||||
} mutex;
|
||||
} u;
|
||||
|
||||
@@ -107,6 +108,7 @@ struct ntsync_q {
|
||||
atomic_t signaled;
|
||||
|
||||
bool all;
|
||||
+ bool ownerdead;
|
||||
__u32 count;
|
||||
struct ntsync_q_entry entries[];
|
||||
};
|
||||
@@ -275,6 +277,9 @@ static void try_wake_all(struct ntsync_d
|
||||
obj->u.sem.count--;
|
||||
break;
|
||||
case NTSYNC_TYPE_MUTEX:
|
||||
+ if (obj->u.mutex.ownerdead)
|
||||
+ q->ownerdead = true;
|
||||
+ obj->u.mutex.ownerdead = false;
|
||||
obj->u.mutex.count++;
|
||||
obj->u.mutex.owner = q->owner;
|
||||
break;
|
||||
@@ -338,6 +343,9 @@ static void try_wake_any_mutex(struct nt
|
||||
continue;
|
||||
|
||||
if (atomic_try_cmpxchg(&q->signaled, &signaled, entry->index)) {
|
||||
+ if (mutex->u.mutex.ownerdead)
|
||||
+ q->ownerdead = true;
|
||||
+ mutex->u.mutex.ownerdead = false;
|
||||
mutex->u.mutex.count++;
|
||||
mutex->u.mutex.owner = q->owner;
|
||||
wake_up_process(q->task);
|
||||
@@ -447,6 +455,52 @@ static int ntsync_mutex_unlock(struct nt
|
||||
return ret;
|
||||
}
|
||||
|
||||
+/*
|
||||
+ * Actually change the mutex state to mark its owner as dead,
|
||||
+ * returning -EPERM if not the owner.
|
||||
+ */
|
||||
+static int kill_mutex_state(struct ntsync_obj *mutex, __u32 owner)
|
||||
+{
|
||||
+ ntsync_assert_held(mutex);
|
||||
+
|
||||
+ if (mutex->u.mutex.owner != owner)
|
||||
+ return -EPERM;
|
||||
+
|
||||
+ mutex->u.mutex.ownerdead = true;
|
||||
+ mutex->u.mutex.owner = 0;
|
||||
+ mutex->u.mutex.count = 0;
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
||||
+static int ntsync_mutex_kill(struct ntsync_obj *mutex, void __user *argp)
|
||||
+{
|
||||
+ struct ntsync_device *dev = mutex->dev;
|
||||
+ __u32 owner;
|
||||
+ bool all;
|
||||
+ int ret;
|
||||
+
|
||||
+ if (get_user(owner, (__u32 __user *)argp))
|
||||
+ return -EFAULT;
|
||||
+ if (!owner)
|
||||
+ return -EINVAL;
|
||||
+
|
||||
+ if (mutex->type != NTSYNC_TYPE_MUTEX)
|
||||
+ return -EINVAL;
|
||||
+
|
||||
+ all = ntsync_lock_obj(dev, mutex);
|
||||
+
|
||||
+ ret = kill_mutex_state(mutex, owner);
|
||||
+ if (!ret) {
|
||||
+ if (all)
|
||||
+ try_wake_all_obj(dev, mutex);
|
||||
+ try_wake_any_mutex(mutex);
|
||||
+ }
|
||||
+
|
||||
+ ntsync_unlock_obj(dev, mutex, all);
|
||||
+
|
||||
+ return ret;
|
||||
+}
|
||||
+
|
||||
static int ntsync_obj_release(struct inode *inode, struct file *file)
|
||||
{
|
||||
struct ntsync_obj *obj = file->private_data;
|
||||
@@ -468,6 +522,8 @@ static long ntsync_obj_ioctl(struct file
|
||||
return ntsync_sem_post(obj, argp);
|
||||
case NTSYNC_IOC_MUTEX_UNLOCK:
|
||||
return ntsync_mutex_unlock(obj, argp);
|
||||
+ case NTSYNC_IOC_MUTEX_KILL:
|
||||
+ return ntsync_mutex_kill(obj, argp);
|
||||
default:
|
||||
return -ENOIOCTLCMD;
|
||||
}
|
||||
@@ -659,6 +715,7 @@ static int setup_wait(struct ntsync_devi
|
||||
q->owner = args->owner;
|
||||
atomic_set(&q->signaled, -1);
|
||||
q->all = all;
|
||||
+ q->ownerdead = false;
|
||||
q->count = count;
|
||||
|
||||
for (i = 0; i < count; i++) {
|
||||
@@ -767,7 +824,7 @@ static int ntsync_wait_any(struct ntsync
|
||||
struct ntsync_wait_args __user *user_args = argp;
|
||||
|
||||
/* even if we caught a signal, we need to communicate success */
|
||||
- ret = 0;
|
||||
+ ret = q->ownerdead ? -EOWNERDEAD : 0;
|
||||
|
||||
if (put_user(signaled, &user_args->index))
|
||||
ret = -EFAULT;
|
||||
@@ -848,7 +905,7 @@ static int ntsync_wait_all(struct ntsync
|
||||
struct ntsync_wait_args __user *user_args = argp;
|
||||
|
||||
/* even if we caught a signal, we need to communicate success */
|
||||
- ret = 0;
|
||||
+ ret = q->ownerdead ? -EOWNERDEAD : 0;
|
||||
|
||||
if (put_user(signaled, &user_args->index))
|
||||
ret = -EFAULT;
|
||||
--- a/include/uapi/linux/ntsync.h
|
||||
+++ b/include/uapi/linux/ntsync.h
|
||||
@@ -43,5 +43,6 @@ struct ntsync_wait_args {
|
||||
|
||||
#define NTSYNC_IOC_SEM_POST _IOWR('N', 0x81, __u32)
|
||||
#define NTSYNC_IOC_MUTEX_UNLOCK _IOWR('N', 0x85, struct ntsync_mutex_args)
|
||||
+#define NTSYNC_IOC_MUTEX_KILL _IOW ('N', 0x86, __u32)
|
||||
|
||||
#endif
|
166
debian/patches/misc-ntsync5/0006-ntsync-Introduce-NTSYNC_IOC_CREATE_EVENT.patch
vendored
Normal file
166
debian/patches/misc-ntsync5/0006-ntsync-Introduce-NTSYNC_IOC_CREATE_EVENT.patch
vendored
Normal file
@ -0,0 +1,166 @@
|
||||
From 3f3bbc85f1e613364261d685b8197c32ffdeaad0 Mon Sep 17 00:00:00 2001
|
||||
From: Elizabeth Figura <zfigura@codeweavers.com>
|
||||
Date: Sun, 19 May 2024 15:24:32 -0500
|
||||
Subject: ntsync: Introduce NTSYNC_IOC_CREATE_EVENT.
|
||||
|
||||
This correspond to the NT syscall NtCreateEvent().
|
||||
|
||||
An NT event holds a single bit of state denoting whether it is signaled or
|
||||
unsignaled.
|
||||
|
||||
There are two types of events: manual-reset and automatic-reset. When an
|
||||
automatic-reset event is acquired via a wait function, its state is reset to
|
||||
unsignaled. Manual-reset events are not affected by wait functions.
|
||||
|
||||
Whether the event is manual-reset, and its initial state, are specified at
|
||||
creation time.
|
||||
|
||||
Signed-off-by: Elizabeth Figura <zfigura@codeweavers.com>
|
||||
---
|
||||
drivers/misc/ntsync.c | 62 +++++++++++++++++++++++++++++++++++++
|
||||
include/uapi/linux/ntsync.h | 7 +++++
|
||||
2 files changed, 69 insertions(+)
|
||||
|
||||
--- a/drivers/misc/ntsync.c
|
||||
+++ b/drivers/misc/ntsync.c
|
||||
@@ -26,6 +26,7 @@
|
||||
enum ntsync_type {
|
||||
NTSYNC_TYPE_SEM,
|
||||
NTSYNC_TYPE_MUTEX,
|
||||
+ NTSYNC_TYPE_EVENT,
|
||||
};
|
||||
|
||||
/*
|
||||
@@ -61,6 +62,10 @@ struct ntsync_obj {
|
||||
pid_t owner;
|
||||
bool ownerdead;
|
||||
} mutex;
|
||||
+ struct {
|
||||
+ bool manual;
|
||||
+ bool signaled;
|
||||
+ } event;
|
||||
} u;
|
||||
|
||||
/*
|
||||
@@ -233,6 +238,8 @@ static bool is_signaled(struct ntsync_ob
|
||||
if (obj->u.mutex.owner && obj->u.mutex.owner != owner)
|
||||
return false;
|
||||
return obj->u.mutex.count < UINT_MAX;
|
||||
+ case NTSYNC_TYPE_EVENT:
|
||||
+ return obj->u.event.signaled;
|
||||
}
|
||||
|
||||
WARN(1, "bad object type %#x\n", obj->type);
|
||||
@@ -283,6 +290,10 @@ static void try_wake_all(struct ntsync_d
|
||||
obj->u.mutex.count++;
|
||||
obj->u.mutex.owner = q->owner;
|
||||
break;
|
||||
+ case NTSYNC_TYPE_EVENT:
|
||||
+ if (!obj->u.event.manual)
|
||||
+ obj->u.event.signaled = false;
|
||||
+ break;
|
||||
}
|
||||
}
|
||||
wake_up_process(q->task);
|
||||
@@ -353,6 +364,28 @@ static void try_wake_any_mutex(struct nt
|
||||
}
|
||||
}
|
||||
|
||||
+static void try_wake_any_event(struct ntsync_obj *event)
|
||||
+{
|
||||
+ struct ntsync_q_entry *entry;
|
||||
+
|
||||
+ ntsync_assert_held(event);
|
||||
+ lockdep_assert(event->type == NTSYNC_TYPE_EVENT);
|
||||
+
|
||||
+ list_for_each_entry(entry, &event->any_waiters, node) {
|
||||
+ struct ntsync_q *q = entry->q;
|
||||
+ int signaled = -1;
|
||||
+
|
||||
+ if (!event->u.event.signaled)
|
||||
+ break;
|
||||
+
|
||||
+ if (atomic_try_cmpxchg(&q->signaled, &signaled, entry->index)) {
|
||||
+ if (!event->u.event.manual)
|
||||
+ event->u.event.signaled = false;
|
||||
+ wake_up_process(q->task);
|
||||
+ }
|
||||
+ }
|
||||
+}
|
||||
+
|
||||
/*
|
||||
* Actually change the semaphore state, returning -EOVERFLOW if it is made
|
||||
* invalid.
|
||||
@@ -629,6 +662,30 @@ static int ntsync_create_mutex(struct nt
|
||||
return put_user(fd, &user_args->mutex);
|
||||
}
|
||||
|
||||
+static int ntsync_create_event(struct ntsync_device *dev, void __user *argp)
|
||||
+{
|
||||
+ struct ntsync_event_args __user *user_args = argp;
|
||||
+ struct ntsync_event_args args;
|
||||
+ struct ntsync_obj *event;
|
||||
+ int fd;
|
||||
+
|
||||
+ if (copy_from_user(&args, argp, sizeof(args)))
|
||||
+ return -EFAULT;
|
||||
+
|
||||
+ event = ntsync_alloc_obj(dev, NTSYNC_TYPE_EVENT);
|
||||
+ if (!event)
|
||||
+ return -ENOMEM;
|
||||
+ event->u.event.manual = args.manual;
|
||||
+ event->u.event.signaled = args.signaled;
|
||||
+ fd = ntsync_obj_get_fd(event);
|
||||
+ if (fd < 0) {
|
||||
+ kfree(event);
|
||||
+ return fd;
|
||||
+ }
|
||||
+
|
||||
+ return put_user(fd, &user_args->event);
|
||||
+}
|
||||
+
|
||||
static struct ntsync_obj *get_obj(struct ntsync_device *dev, int fd)
|
||||
{
|
||||
struct file *file = fget(fd);
|
||||
@@ -759,6 +816,9 @@ static void try_wake_any_obj(struct ntsy
|
||||
case NTSYNC_TYPE_MUTEX:
|
||||
try_wake_any_mutex(obj);
|
||||
break;
|
||||
+ case NTSYNC_TYPE_EVENT:
|
||||
+ try_wake_any_event(obj);
|
||||
+ break;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -948,6 +1008,8 @@ static long ntsync_char_ioctl(struct fil
|
||||
void __user *argp = (void __user *)parm;
|
||||
|
||||
switch (cmd) {
|
||||
+ case NTSYNC_IOC_CREATE_EVENT:
|
||||
+ return ntsync_create_event(dev, argp);
|
||||
case NTSYNC_IOC_CREATE_MUTEX:
|
||||
return ntsync_create_mutex(dev, argp);
|
||||
case NTSYNC_IOC_CREATE_SEM:
|
||||
--- a/include/uapi/linux/ntsync.h
|
||||
+++ b/include/uapi/linux/ntsync.h
|
||||
@@ -22,6 +22,12 @@ struct ntsync_mutex_args {
|
||||
__u32 count;
|
||||
};
|
||||
|
||||
+struct ntsync_event_args {
|
||||
+ __u32 event;
|
||||
+ __u32 manual;
|
||||
+ __u32 signaled;
|
||||
+};
|
||||
+
|
||||
#define NTSYNC_WAIT_REALTIME 0x1
|
||||
|
||||
struct ntsync_wait_args {
|
||||
@@ -40,6 +46,7 @@ struct ntsync_wait_args {
|
||||
#define NTSYNC_IOC_WAIT_ANY _IOWR('N', 0x82, struct ntsync_wait_args)
|
||||
#define NTSYNC_IOC_WAIT_ALL _IOWR('N', 0x83, struct ntsync_wait_args)
|
||||
#define NTSYNC_IOC_CREATE_MUTEX _IOWR('N', 0x84, struct ntsync_sem_args)
|
||||
+#define NTSYNC_IOC_CREATE_EVENT _IOWR('N', 0x87, struct ntsync_event_args)
|
||||
|
||||
#define NTSYNC_IOC_SEM_POST _IOWR('N', 0x81, __u32)
|
||||
#define NTSYNC_IOC_MUTEX_UNLOCK _IOWR('N', 0x85, struct ntsync_mutex_args)
|
67
debian/patches/misc-ntsync5/0007-ntsync-Introduce-NTSYNC_IOC_EVENT_SET.patch
vendored
Normal file
67
debian/patches/misc-ntsync5/0007-ntsync-Introduce-NTSYNC_IOC_EVENT_SET.patch
vendored
Normal file
@ -0,0 +1,67 @@
|
||||
From a6f107f17a976008b85c3e269bf4196e595d3f52 Mon Sep 17 00:00:00 2001
|
||||
From: Elizabeth Figura <zfigura@codeweavers.com>
|
||||
Date: Sun, 19 May 2024 15:24:33 -0500
|
||||
Subject: ntsync: Introduce NTSYNC_IOC_EVENT_SET.
|
||||
|
||||
This corresponds to the NT syscall NtSetEvent().
|
||||
|
||||
This sets the event to the signaled state, and returns its previous state.
|
||||
|
||||
Signed-off-by: Elizabeth Figura <zfigura@codeweavers.com>
|
||||
---
|
||||
drivers/misc/ntsync.c | 27 +++++++++++++++++++++++++++
|
||||
include/uapi/linux/ntsync.h | 1 +
|
||||
2 files changed, 28 insertions(+)
|
||||
|
||||
--- a/drivers/misc/ntsync.c
|
||||
+++ b/drivers/misc/ntsync.c
|
||||
@@ -534,6 +534,31 @@ static int ntsync_mutex_kill(struct ntsy
|
||||
return ret;
|
||||
}
|
||||
|
||||
+static int ntsync_event_set(struct ntsync_obj *event, void __user *argp)
|
||||
+{
|
||||
+ struct ntsync_device *dev = event->dev;
|
||||
+ __u32 prev_state;
|
||||
+ bool all;
|
||||
+
|
||||
+ if (event->type != NTSYNC_TYPE_EVENT)
|
||||
+ return -EINVAL;
|
||||
+
|
||||
+ all = ntsync_lock_obj(dev, event);
|
||||
+
|
||||
+ prev_state = event->u.event.signaled;
|
||||
+ event->u.event.signaled = true;
|
||||
+ if (all)
|
||||
+ try_wake_all_obj(dev, event);
|
||||
+ try_wake_any_event(event);
|
||||
+
|
||||
+ ntsync_unlock_obj(dev, event, all);
|
||||
+
|
||||
+ if (put_user(prev_state, (__u32 __user *)argp))
|
||||
+ return -EFAULT;
|
||||
+
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
||||
static int ntsync_obj_release(struct inode *inode, struct file *file)
|
||||
{
|
||||
struct ntsync_obj *obj = file->private_data;
|
||||
@@ -557,6 +582,8 @@ static long ntsync_obj_ioctl(struct file
|
||||
return ntsync_mutex_unlock(obj, argp);
|
||||
case NTSYNC_IOC_MUTEX_KILL:
|
||||
return ntsync_mutex_kill(obj, argp);
|
||||
+ case NTSYNC_IOC_EVENT_SET:
|
||||
+ return ntsync_event_set(obj, argp);
|
||||
default:
|
||||
return -ENOIOCTLCMD;
|
||||
}
|
||||
--- a/include/uapi/linux/ntsync.h
|
||||
+++ b/include/uapi/linux/ntsync.h
|
||||
@@ -51,5 +51,6 @@ struct ntsync_wait_args {
|
||||
#define NTSYNC_IOC_SEM_POST _IOWR('N', 0x81, __u32)
|
||||
#define NTSYNC_IOC_MUTEX_UNLOCK _IOWR('N', 0x85, struct ntsync_mutex_args)
|
||||
#define NTSYNC_IOC_MUTEX_KILL _IOW ('N', 0x86, __u32)
|
||||
+#define NTSYNC_IOC_EVENT_SET _IOR ('N', 0x88, __u32)
|
||||
|
||||
#endif
|
64
debian/patches/misc-ntsync5/0008-ntsync-Introduce-NTSYNC_IOC_EVENT_RESET.patch
vendored
Normal file
64
debian/patches/misc-ntsync5/0008-ntsync-Introduce-NTSYNC_IOC_EVENT_RESET.patch
vendored
Normal file
@ -0,0 +1,64 @@
|
||||
From aa3ebb5870eb9ed259aba2ed4e07e9993e6cd978 Mon Sep 17 00:00:00 2001
|
||||
From: Elizabeth Figura <zfigura@codeweavers.com>
|
||||
Date: Sun, 19 May 2024 15:24:34 -0500
|
||||
Subject: ntsync: Introduce NTSYNC_IOC_EVENT_RESET.
|
||||
|
||||
This corresponds to the NT syscall NtResetEvent().
|
||||
|
||||
This sets the event to the unsignaled state, and returns its previous state.
|
||||
|
||||
Signed-off-by: Elizabeth Figura <zfigura@codeweavers.com>
|
||||
---
|
||||
drivers/misc/ntsync.c | 24 ++++++++++++++++++++++++
|
||||
include/uapi/linux/ntsync.h | 1 +
|
||||
2 files changed, 25 insertions(+)
|
||||
|
||||
--- a/drivers/misc/ntsync.c
|
||||
+++ b/drivers/misc/ntsync.c
|
||||
@@ -559,6 +559,28 @@ static int ntsync_event_set(struct ntsyn
|
||||
return 0;
|
||||
}
|
||||
|
||||
+static int ntsync_event_reset(struct ntsync_obj *event, void __user *argp)
|
||||
+{
|
||||
+ struct ntsync_device *dev = event->dev;
|
||||
+ __u32 prev_state;
|
||||
+ bool all;
|
||||
+
|
||||
+ if (event->type != NTSYNC_TYPE_EVENT)
|
||||
+ return -EINVAL;
|
||||
+
|
||||
+ all = ntsync_lock_obj(dev, event);
|
||||
+
|
||||
+ prev_state = event->u.event.signaled;
|
||||
+ event->u.event.signaled = false;
|
||||
+
|
||||
+ ntsync_unlock_obj(dev, event, all);
|
||||
+
|
||||
+ if (put_user(prev_state, (__u32 __user *)argp))
|
||||
+ return -EFAULT;
|
||||
+
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
||||
static int ntsync_obj_release(struct inode *inode, struct file *file)
|
||||
{
|
||||
struct ntsync_obj *obj = file->private_data;
|
||||
@@ -584,6 +606,8 @@ static long ntsync_obj_ioctl(struct file
|
||||
return ntsync_mutex_kill(obj, argp);
|
||||
case NTSYNC_IOC_EVENT_SET:
|
||||
return ntsync_event_set(obj, argp);
|
||||
+ case NTSYNC_IOC_EVENT_RESET:
|
||||
+ return ntsync_event_reset(obj, argp);
|
||||
default:
|
||||
return -ENOIOCTLCMD;
|
||||
}
|
||||
--- a/include/uapi/linux/ntsync.h
|
||||
+++ b/include/uapi/linux/ntsync.h
|
||||
@@ -52,5 +52,6 @@ struct ntsync_wait_args {
|
||||
#define NTSYNC_IOC_MUTEX_UNLOCK _IOWR('N', 0x85, struct ntsync_mutex_args)
|
||||
#define NTSYNC_IOC_MUTEX_KILL _IOW ('N', 0x86, __u32)
|
||||
#define NTSYNC_IOC_EVENT_SET _IOR ('N', 0x88, __u32)
|
||||
+#define NTSYNC_IOC_EVENT_RESET _IOR ('N', 0x89, __u32)
|
||||
|
||||
#endif
|
60
debian/patches/misc-ntsync5/0009-ntsync-Introduce-NTSYNC_IOC_EVENT_PULSE.patch
vendored
Normal file
60
debian/patches/misc-ntsync5/0009-ntsync-Introduce-NTSYNC_IOC_EVENT_PULSE.patch
vendored
Normal file
@ -0,0 +1,60 @@
|
||||
From 99bca5d776a3011214041c42107a210fe315a35e Mon Sep 17 00:00:00 2001
|
||||
From: Elizabeth Figura <zfigura@codeweavers.com>
|
||||
Date: Sun, 19 May 2024 15:24:35 -0500
|
||||
Subject: ntsync: Introduce NTSYNC_IOC_EVENT_PULSE.
|
||||
|
||||
This corresponds to the NT syscall NtPulseEvent().
|
||||
|
||||
This wakes up any waiters as if the event had been set, but does not set the
|
||||
event, instead resetting it if it had been signalled. Thus, for a manual-reset
|
||||
event, all waiters are woken, whereas for an auto-reset event, at most one
|
||||
waiter is woken.
|
||||
|
||||
Signed-off-by: Elizabeth Figura <zfigura@codeweavers.com>
|
||||
---
|
||||
drivers/misc/ntsync.c | 8 ++++++--
|
||||
include/uapi/linux/ntsync.h | 1 +
|
||||
2 files changed, 7 insertions(+), 2 deletions(-)
|
||||
|
||||
--- a/drivers/misc/ntsync.c
|
||||
+++ b/drivers/misc/ntsync.c
|
||||
@@ -534,7 +534,7 @@ static int ntsync_mutex_kill(struct ntsy
|
||||
return ret;
|
||||
}
|
||||
|
||||
-static int ntsync_event_set(struct ntsync_obj *event, void __user *argp)
|
||||
+static int ntsync_event_set(struct ntsync_obj *event, void __user *argp, bool pulse)
|
||||
{
|
||||
struct ntsync_device *dev = event->dev;
|
||||
__u32 prev_state;
|
||||
@@ -550,6 +550,8 @@ static int ntsync_event_set(struct ntsyn
|
||||
if (all)
|
||||
try_wake_all_obj(dev, event);
|
||||
try_wake_any_event(event);
|
||||
+ if (pulse)
|
||||
+ event->u.event.signaled = false;
|
||||
|
||||
ntsync_unlock_obj(dev, event, all);
|
||||
|
||||
@@ -605,9 +607,11 @@ static long ntsync_obj_ioctl(struct file
|
||||
case NTSYNC_IOC_MUTEX_KILL:
|
||||
return ntsync_mutex_kill(obj, argp);
|
||||
case NTSYNC_IOC_EVENT_SET:
|
||||
- return ntsync_event_set(obj, argp);
|
||||
+ return ntsync_event_set(obj, argp, false);
|
||||
case NTSYNC_IOC_EVENT_RESET:
|
||||
return ntsync_event_reset(obj, argp);
|
||||
+ case NTSYNC_IOC_EVENT_PULSE:
|
||||
+ return ntsync_event_set(obj, argp, true);
|
||||
default:
|
||||
return -ENOIOCTLCMD;
|
||||
}
|
||||
--- a/include/uapi/linux/ntsync.h
|
||||
+++ b/include/uapi/linux/ntsync.h
|
||||
@@ -53,5 +53,6 @@ struct ntsync_wait_args {
|
||||
#define NTSYNC_IOC_MUTEX_KILL _IOW ('N', 0x86, __u32)
|
||||
#define NTSYNC_IOC_EVENT_SET _IOR ('N', 0x88, __u32)
|
||||
#define NTSYNC_IOC_EVENT_RESET _IOR ('N', 0x89, __u32)
|
||||
+#define NTSYNC_IOC_EVENT_PULSE _IOR ('N', 0x8a, __u32)
|
||||
|
||||
#endif
|
66
debian/patches/misc-ntsync5/0010-ntsync-Introduce-NTSYNC_IOC_SEM_READ.patch
vendored
Normal file
66
debian/patches/misc-ntsync5/0010-ntsync-Introduce-NTSYNC_IOC_SEM_READ.patch
vendored
Normal file
@ -0,0 +1,66 @@
|
||||
From 1ef0ea672662bd19e7c6a4eac1067d11e50844b2 Mon Sep 17 00:00:00 2001
|
||||
From: Elizabeth Figura <zfigura@codeweavers.com>
|
||||
Date: Sun, 19 May 2024 15:24:36 -0500
|
||||
Subject: ntsync: Introduce NTSYNC_IOC_SEM_READ.
|
||||
|
||||
This corresponds to the NT syscall NtQuerySemaphore().
|
||||
|
||||
This returns the current count and maximum count of the semaphore.
|
||||
|
||||
Signed-off-by: Elizabeth Figura <zfigura@codeweavers.com>
|
||||
---
|
||||
drivers/misc/ntsync.c | 26 ++++++++++++++++++++++++++
|
||||
include/uapi/linux/ntsync.h | 1 +
|
||||
2 files changed, 27 insertions(+)
|
||||
|
||||
--- a/drivers/misc/ntsync.c
|
||||
+++ b/drivers/misc/ntsync.c
|
||||
@@ -583,6 +583,30 @@ static int ntsync_event_reset(struct nts
|
||||
return 0;
|
||||
}
|
||||
|
||||
+static int ntsync_sem_read(struct ntsync_obj *sem, void __user *argp)
|
||||
+{
|
||||
+ struct ntsync_sem_args __user *user_args = argp;
|
||||
+ struct ntsync_device *dev = sem->dev;
|
||||
+ struct ntsync_sem_args args;
|
||||
+ bool all;
|
||||
+
|
||||
+ if (sem->type != NTSYNC_TYPE_SEM)
|
||||
+ return -EINVAL;
|
||||
+
|
||||
+ args.sem = 0;
|
||||
+
|
||||
+ all = ntsync_lock_obj(dev, sem);
|
||||
+
|
||||
+ args.count = sem->u.sem.count;
|
||||
+ args.max = sem->u.sem.max;
|
||||
+
|
||||
+ ntsync_unlock_obj(dev, sem, all);
|
||||
+
|
||||
+ if (copy_to_user(user_args, &args, sizeof(args)))
|
||||
+ return -EFAULT;
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
||||
static int ntsync_obj_release(struct inode *inode, struct file *file)
|
||||
{
|
||||
struct ntsync_obj *obj = file->private_data;
|
||||
@@ -602,6 +626,8 @@ static long ntsync_obj_ioctl(struct file
|
||||
switch (cmd) {
|
||||
case NTSYNC_IOC_SEM_POST:
|
||||
return ntsync_sem_post(obj, argp);
|
||||
+ case NTSYNC_IOC_SEM_READ:
|
||||
+ return ntsync_sem_read(obj, argp);
|
||||
case NTSYNC_IOC_MUTEX_UNLOCK:
|
||||
return ntsync_mutex_unlock(obj, argp);
|
||||
case NTSYNC_IOC_MUTEX_KILL:
|
||||
--- a/include/uapi/linux/ntsync.h
|
||||
+++ b/include/uapi/linux/ntsync.h
|
||||
@@ -54,5 +54,6 @@ struct ntsync_wait_args {
|
||||
#define NTSYNC_IOC_EVENT_SET _IOR ('N', 0x88, __u32)
|
||||
#define NTSYNC_IOC_EVENT_RESET _IOR ('N', 0x89, __u32)
|
||||
#define NTSYNC_IOC_EVENT_PULSE _IOR ('N', 0x8a, __u32)
|
||||
+#define NTSYNC_IOC_SEM_READ _IOR ('N', 0x8b, struct ntsync_sem_args)
|
||||
|
||||
#endif
|
68
debian/patches/misc-ntsync5/0011-ntsync-Introduce-NTSYNC_IOC_MUTEX_READ.patch
vendored
Normal file
68
debian/patches/misc-ntsync5/0011-ntsync-Introduce-NTSYNC_IOC_MUTEX_READ.patch
vendored
Normal file
@ -0,0 +1,68 @@
|
||||
From 7891b7d15abd12975aebb955821fbc43353b45d6 Mon Sep 17 00:00:00 2001
|
||||
From: Elizabeth Figura <zfigura@codeweavers.com>
|
||||
Date: Sun, 19 May 2024 15:24:37 -0500
|
||||
Subject: ntsync: Introduce NTSYNC_IOC_MUTEX_READ.
|
||||
|
||||
This corresponds to the NT syscall NtQueryMutant().
|
||||
|
||||
This returns the recursion count, owner, and abandoned state of the mutex.
|
||||
|
||||
Signed-off-by: Elizabeth Figura <zfigura@codeweavers.com>
|
||||
---
|
||||
drivers/misc/ntsync.c | 28 ++++++++++++++++++++++++++++
|
||||
include/uapi/linux/ntsync.h | 1 +
|
||||
2 files changed, 29 insertions(+)
|
||||
|
||||
--- a/drivers/misc/ntsync.c
|
||||
+++ b/drivers/misc/ntsync.c
|
||||
@@ -607,6 +607,32 @@ static int ntsync_sem_read(struct ntsync
|
||||
return 0;
|
||||
}
|
||||
|
||||
+static int ntsync_mutex_read(struct ntsync_obj *mutex, void __user *argp)
|
||||
+{
|
||||
+ struct ntsync_mutex_args __user *user_args = argp;
|
||||
+ struct ntsync_device *dev = mutex->dev;
|
||||
+ struct ntsync_mutex_args args;
|
||||
+ bool all;
|
||||
+ int ret;
|
||||
+
|
||||
+ if (mutex->type != NTSYNC_TYPE_MUTEX)
|
||||
+ return -EINVAL;
|
||||
+
|
||||
+ args.mutex = 0;
|
||||
+
|
||||
+ all = ntsync_lock_obj(dev, mutex);
|
||||
+
|
||||
+ args.count = mutex->u.mutex.count;
|
||||
+ args.owner = mutex->u.mutex.owner;
|
||||
+ ret = mutex->u.mutex.ownerdead ? -EOWNERDEAD : 0;
|
||||
+
|
||||
+ ntsync_unlock_obj(dev, mutex, all);
|
||||
+
|
||||
+ if (copy_to_user(user_args, &args, sizeof(args)))
|
||||
+ return -EFAULT;
|
||||
+ return ret;
|
||||
+}
|
||||
+
|
||||
static int ntsync_obj_release(struct inode *inode, struct file *file)
|
||||
{
|
||||
struct ntsync_obj *obj = file->private_data;
|
||||
@@ -632,6 +658,8 @@ static long ntsync_obj_ioctl(struct file
|
||||
return ntsync_mutex_unlock(obj, argp);
|
||||
case NTSYNC_IOC_MUTEX_KILL:
|
||||
return ntsync_mutex_kill(obj, argp);
|
||||
+ case NTSYNC_IOC_MUTEX_READ:
|
||||
+ return ntsync_mutex_read(obj, argp);
|
||||
case NTSYNC_IOC_EVENT_SET:
|
||||
return ntsync_event_set(obj, argp, false);
|
||||
case NTSYNC_IOC_EVENT_RESET:
|
||||
--- a/include/uapi/linux/ntsync.h
|
||||
+++ b/include/uapi/linux/ntsync.h
|
||||
@@ -55,5 +55,6 @@ struct ntsync_wait_args {
|
||||
#define NTSYNC_IOC_EVENT_RESET _IOR ('N', 0x89, __u32)
|
||||
#define NTSYNC_IOC_EVENT_PULSE _IOR ('N', 0x8a, __u32)
|
||||
#define NTSYNC_IOC_SEM_READ _IOR ('N', 0x8b, struct ntsync_sem_args)
|
||||
+#define NTSYNC_IOC_MUTEX_READ _IOR ('N', 0x8c, struct ntsync_mutex_args)
|
||||
|
||||
#endif
|
66
debian/patches/misc-ntsync5/0012-ntsync-Introduce-NTSYNC_IOC_EVENT_READ.patch
vendored
Normal file
66
debian/patches/misc-ntsync5/0012-ntsync-Introduce-NTSYNC_IOC_EVENT_READ.patch
vendored
Normal file
@ -0,0 +1,66 @@
|
||||
From 35ff252f99aa4002e0c2ecef37314a422969791b Mon Sep 17 00:00:00 2001
|
||||
From: Elizabeth Figura <zfigura@codeweavers.com>
|
||||
Date: Sun, 19 May 2024 15:24:38 -0500
|
||||
Subject: ntsync: Introduce NTSYNC_IOC_EVENT_READ.
|
||||
|
||||
This corresponds to the NT syscall NtQueryEvent().
|
||||
|
||||
This returns the signaled state of the event and whether it is manual-reset.
|
||||
|
||||
Signed-off-by: Elizabeth Figura <zfigura@codeweavers.com>
|
||||
---
|
||||
drivers/misc/ntsync.c | 26 ++++++++++++++++++++++++++
|
||||
include/uapi/linux/ntsync.h | 1 +
|
||||
2 files changed, 27 insertions(+)
|
||||
|
||||
--- a/drivers/misc/ntsync.c
|
||||
+++ b/drivers/misc/ntsync.c
|
||||
@@ -633,6 +633,30 @@ static int ntsync_mutex_read(struct ntsy
|
||||
return ret;
|
||||
}
|
||||
|
||||
+static int ntsync_event_read(struct ntsync_obj *event, void __user *argp)
|
||||
+{
|
||||
+ struct ntsync_event_args __user *user_args = argp;
|
||||
+ struct ntsync_device *dev = event->dev;
|
||||
+ struct ntsync_event_args args;
|
||||
+ bool all;
|
||||
+
|
||||
+ if (event->type != NTSYNC_TYPE_EVENT)
|
||||
+ return -EINVAL;
|
||||
+
|
||||
+ args.event = 0;
|
||||
+
|
||||
+ all = ntsync_lock_obj(dev, event);
|
||||
+
|
||||
+ args.manual = event->u.event.manual;
|
||||
+ args.signaled = event->u.event.signaled;
|
||||
+
|
||||
+ ntsync_unlock_obj(dev, event, all);
|
||||
+
|
||||
+ if (copy_to_user(user_args, &args, sizeof(args)))
|
||||
+ return -EFAULT;
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
||||
static int ntsync_obj_release(struct inode *inode, struct file *file)
|
||||
{
|
||||
struct ntsync_obj *obj = file->private_data;
|
||||
@@ -666,6 +690,8 @@ static long ntsync_obj_ioctl(struct file
|
||||
return ntsync_event_reset(obj, argp);
|
||||
case NTSYNC_IOC_EVENT_PULSE:
|
||||
return ntsync_event_set(obj, argp, true);
|
||||
+ case NTSYNC_IOC_EVENT_READ:
|
||||
+ return ntsync_event_read(obj, argp);
|
||||
default:
|
||||
return -ENOIOCTLCMD;
|
||||
}
|
||||
--- a/include/uapi/linux/ntsync.h
|
||||
+++ b/include/uapi/linux/ntsync.h
|
||||
@@ -56,5 +56,6 @@ struct ntsync_wait_args {
|
||||
#define NTSYNC_IOC_EVENT_PULSE _IOR ('N', 0x8a, __u32)
|
||||
#define NTSYNC_IOC_SEM_READ _IOR ('N', 0x8b, struct ntsync_sem_args)
|
||||
#define NTSYNC_IOC_MUTEX_READ _IOR ('N', 0x8c, struct ntsync_mutex_args)
|
||||
+#define NTSYNC_IOC_EVENT_READ _IOR ('N', 0x8d, struct ntsync_event_args)
|
||||
|
||||
#endif
|
187
debian/patches/misc-ntsync5/0013-ntsync-Introduce-alertable-waits.patch
vendored
Normal file
187
debian/patches/misc-ntsync5/0013-ntsync-Introduce-alertable-waits.patch
vendored
Normal file
@ -0,0 +1,187 @@
|
||||
From 2c391d57d1393cd46bf8bab08232ddc3dd32d5e5 Mon Sep 17 00:00:00 2001
|
||||
From: Elizabeth Figura <zfigura@codeweavers.com>
|
||||
Date: Sun, 19 May 2024 15:24:39 -0500
|
||||
Subject: ntsync: Introduce alertable waits.
|
||||
|
||||
NT waits can optionally be made "alertable". This is a special channel for
|
||||
thread wakeup that is mildly similar to SIGIO. A thread has an internal single
|
||||
bit of "alerted" state, and if a thread is alerted while an alertable wait, the
|
||||
wait will return a special value, consume the "alerted" state, and will not
|
||||
consume any of its objects.
|
||||
|
||||
Alerts are implemented using events; the user-space NT emulator is expected to
|
||||
create an internal ntsync event for each thread and pass that event to wait
|
||||
functions.
|
||||
|
||||
Signed-off-by: Elizabeth Figura <zfigura@codeweavers.com>
|
||||
---
|
||||
drivers/misc/ntsync.c | 70 ++++++++++++++++++++++++++++++++-----
|
||||
include/uapi/linux/ntsync.h | 3 +-
|
||||
2 files changed, 63 insertions(+), 10 deletions(-)
|
||||
|
||||
--- a/drivers/misc/ntsync.c
|
||||
+++ b/drivers/misc/ntsync.c
|
||||
@@ -885,22 +885,29 @@ static int setup_wait(struct ntsync_devi
|
||||
const struct ntsync_wait_args *args, bool all,
|
||||
struct ntsync_q **ret_q)
|
||||
{
|
||||
+ int fds[NTSYNC_MAX_WAIT_COUNT + 1];
|
||||
const __u32 count = args->count;
|
||||
- int fds[NTSYNC_MAX_WAIT_COUNT];
|
||||
struct ntsync_q *q;
|
||||
+ __u32 total_count;
|
||||
__u32 i, j;
|
||||
|
||||
- if (args->pad[0] || args->pad[1] || (args->flags & ~NTSYNC_WAIT_REALTIME))
|
||||
+ if (args->pad || (args->flags & ~NTSYNC_WAIT_REALTIME))
|
||||
return -EINVAL;
|
||||
|
||||
if (args->count > NTSYNC_MAX_WAIT_COUNT)
|
||||
return -EINVAL;
|
||||
|
||||
+ total_count = count;
|
||||
+ if (args->alert)
|
||||
+ total_count++;
|
||||
+
|
||||
if (copy_from_user(fds, u64_to_user_ptr(args->objs),
|
||||
array_size(count, sizeof(*fds))))
|
||||
return -EFAULT;
|
||||
+ if (args->alert)
|
||||
+ fds[count] = args->alert;
|
||||
|
||||
- q = kmalloc(struct_size(q, entries, count), GFP_KERNEL);
|
||||
+ q = kmalloc(struct_size(q, entries, total_count), GFP_KERNEL);
|
||||
if (!q)
|
||||
return -ENOMEM;
|
||||
q->task = current;
|
||||
@@ -910,7 +917,7 @@ static int setup_wait(struct ntsync_devi
|
||||
q->ownerdead = false;
|
||||
q->count = count;
|
||||
|
||||
- for (i = 0; i < count; i++) {
|
||||
+ for (i = 0; i < total_count; i++) {
|
||||
struct ntsync_q_entry *entry = &q->entries[i];
|
||||
struct ntsync_obj *obj = get_obj(dev, fds[i]);
|
||||
|
||||
@@ -960,10 +967,10 @@ static void try_wake_any_obj(struct ntsy
|
||||
static int ntsync_wait_any(struct ntsync_device *dev, void __user *argp)
|
||||
{
|
||||
struct ntsync_wait_args args;
|
||||
+ __u32 i, total_count;
|
||||
struct ntsync_q *q;
|
||||
int signaled;
|
||||
bool all;
|
||||
- __u32 i;
|
||||
int ret;
|
||||
|
||||
if (copy_from_user(&args, argp, sizeof(args)))
|
||||
@@ -973,9 +980,13 @@ static int ntsync_wait_any(struct ntsync
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
|
||||
+ total_count = args.count;
|
||||
+ if (args.alert)
|
||||
+ total_count++;
|
||||
+
|
||||
/* queue ourselves */
|
||||
|
||||
- for (i = 0; i < args.count; i++) {
|
||||
+ for (i = 0; i < total_count; i++) {
|
||||
struct ntsync_q_entry *entry = &q->entries[i];
|
||||
struct ntsync_obj *obj = entry->obj;
|
||||
|
||||
@@ -984,9 +995,15 @@ static int ntsync_wait_any(struct ntsync
|
||||
ntsync_unlock_obj(dev, obj, all);
|
||||
}
|
||||
|
||||
- /* check if we are already signaled */
|
||||
+ /*
|
||||
+ * Check if we are already signaled.
|
||||
+ *
|
||||
+ * Note that the API requires that normal objects are checked before
|
||||
+ * the alert event. Hence we queue the alert event last, and check
|
||||
+ * objects in order.
|
||||
+ */
|
||||
|
||||
- for (i = 0; i < args.count; i++) {
|
||||
+ for (i = 0; i < total_count; i++) {
|
||||
struct ntsync_obj *obj = q->entries[i].obj;
|
||||
|
||||
if (atomic_read(&q->signaled) != -1)
|
||||
@@ -1003,7 +1020,7 @@ static int ntsync_wait_any(struct ntsync
|
||||
|
||||
/* and finally, unqueue */
|
||||
|
||||
- for (i = 0; i < args.count; i++) {
|
||||
+ for (i = 0; i < total_count; i++) {
|
||||
struct ntsync_q_entry *entry = &q->entries[i];
|
||||
struct ntsync_obj *obj = entry->obj;
|
||||
|
||||
@@ -1063,6 +1080,14 @@ static int ntsync_wait_all(struct ntsync
|
||||
*/
|
||||
list_add_tail(&entry->node, &obj->all_waiters);
|
||||
}
|
||||
+ if (args.alert) {
|
||||
+ struct ntsync_q_entry *entry = &q->entries[args.count];
|
||||
+ struct ntsync_obj *obj = entry->obj;
|
||||
+
|
||||
+ dev_lock_obj(dev, obj);
|
||||
+ list_add_tail(&entry->node, &obj->any_waiters);
|
||||
+ dev_unlock_obj(dev, obj);
|
||||
+ }
|
||||
|
||||
/* check if we are already signaled */
|
||||
|
||||
@@ -1070,6 +1095,21 @@ static int ntsync_wait_all(struct ntsync
|
||||
|
||||
mutex_unlock(&dev->wait_all_lock);
|
||||
|
||||
+ /*
|
||||
+ * Check if the alert event is signaled, making sure to do so only
|
||||
+ * after checking if the other objects are signaled.
|
||||
+ */
|
||||
+
|
||||
+ if (args.alert) {
|
||||
+ struct ntsync_obj *obj = q->entries[args.count].obj;
|
||||
+
|
||||
+ if (atomic_read(&q->signaled) == -1) {
|
||||
+ bool all = ntsync_lock_obj(dev, obj);
|
||||
+ try_wake_any_obj(obj);
|
||||
+ ntsync_unlock_obj(dev, obj, all);
|
||||
+ }
|
||||
+ }
|
||||
+
|
||||
/* sleep */
|
||||
|
||||
ret = ntsync_schedule(q, &args);
|
||||
@@ -1095,6 +1135,18 @@ static int ntsync_wait_all(struct ntsync
|
||||
|
||||
mutex_unlock(&dev->wait_all_lock);
|
||||
|
||||
+ if (args.alert) {
|
||||
+ struct ntsync_q_entry *entry = &q->entries[args.count];
|
||||
+ struct ntsync_obj *obj = entry->obj;
|
||||
+ bool all;
|
||||
+
|
||||
+ all = ntsync_lock_obj(dev, obj);
|
||||
+ list_del(&entry->node);
|
||||
+ ntsync_unlock_obj(dev, obj, all);
|
||||
+
|
||||
+ put_obj(obj);
|
||||
+ }
|
||||
+
|
||||
signaled = atomic_read(&q->signaled);
|
||||
if (signaled != -1) {
|
||||
struct ntsync_wait_args __user *user_args = argp;
|
||||
--- a/include/uapi/linux/ntsync.h
|
||||
+++ b/include/uapi/linux/ntsync.h
|
||||
@@ -37,7 +37,8 @@ struct ntsync_wait_args {
|
||||
__u32 index;
|
||||
__u32 flags;
|
||||
__u32 owner;
|
||||
- __u32 pad[2];
|
||||
+ __u32 alert;
|
||||
+ __u32 pad;
|
||||
};
|
||||
|
||||
#define NTSYNC_MAX_WAIT_COUNT 64
|
30
debian/patches/misc-ntsync5/0014-maintainers-Add-an-entry-for-ntsync.patch
vendored
Normal file
30
debian/patches/misc-ntsync5/0014-maintainers-Add-an-entry-for-ntsync.patch
vendored
Normal file
@ -0,0 +1,30 @@
|
||||
From 8d87043cd76368bb9996ba541d12e40cbb4201e5 Mon Sep 17 00:00:00 2001
|
||||
From: Elizabeth Figura <zfigura@codeweavers.com>
|
||||
Date: Sun, 19 May 2024 15:24:52 -0500
|
||||
Subject: maintainers: Add an entry for ntsync.
|
||||
|
||||
Add myself as maintainer, supported by CodeWeavers.
|
||||
|
||||
Signed-off-by: Elizabeth Figura <zfigura@codeweavers.com>
|
||||
---
|
||||
MAINTAINERS | 9 +++++++++
|
||||
1 file changed, 9 insertions(+)
|
||||
|
||||
--- a/MAINTAINERS
|
||||
+++ b/MAINTAINERS
|
||||
@@ -16319,6 +16319,15 @@ T: git https://github.com/Paragon-Softwa
|
||||
F: Documentation/filesystems/ntfs3.rst
|
||||
F: fs/ntfs3/
|
||||
|
||||
+NTSYNC SYNCHRONIZATION PRIMITIVE DRIVER
|
||||
+M: Elizabeth Figura <zfigura@codeweavers.com>
|
||||
+L: wine-devel@winehq.org
|
||||
+S: Supported
|
||||
+F: Documentation/userspace-api/ntsync.rst
|
||||
+F: drivers/misc/ntsync.c
|
||||
+F: include/uapi/linux/ntsync.h
|
||||
+F: tools/testing/selftests/drivers/ntsync/
|
||||
+
|
||||
NUBUS SUBSYSTEM
|
||||
M: Finn Thain <fthain@linux-m68k.org>
|
||||
L: linux-m68k@lists.linux-m68k.org
|
426
debian/patches/misc-ntsync5/0015-docs-ntsync-Add-documentation-for-the-ntsync-uAPI.patch
vendored
Normal file
426
debian/patches/misc-ntsync5/0015-docs-ntsync-Add-documentation-for-the-ntsync-uAPI.patch
vendored
Normal file
@ -0,0 +1,426 @@
|
||||
From 4cb25d42d38f1e0b144b084674591b70afa60bb0 Mon Sep 17 00:00:00 2001
|
||||
From: Elizabeth Figura <zfigura@codeweavers.com>
|
||||
Date: Sun, 19 May 2024 15:24:53 -0500
|
||||
Subject: docs: ntsync: Add documentation for the ntsync uAPI.
|
||||
|
||||
Add an overall explanation of the driver architecture, and complete and precise
|
||||
specification for its intended behaviour.
|
||||
|
||||
Signed-off-by: Elizabeth Figura <zfigura@codeweavers.com>
|
||||
---
|
||||
Documentation/userspace-api/index.rst | 1 +
|
||||
Documentation/userspace-api/ntsync.rst | 398 +++++++++++++++++++++++++
|
||||
2 files changed, 399 insertions(+)
|
||||
create mode 100644 Documentation/userspace-api/ntsync.rst
|
||||
|
||||
--- a/Documentation/userspace-api/index.rst
|
||||
+++ b/Documentation/userspace-api/index.rst
|
||||
@@ -63,6 +63,7 @@ Everything else
|
||||
vduse
|
||||
futex2
|
||||
perf_ring_buffer
|
||||
+ ntsync
|
||||
|
||||
.. only:: subproject and html
|
||||
|
||||
--- /dev/null
|
||||
+++ b/Documentation/userspace-api/ntsync.rst
|
||||
@@ -0,0 +1,398 @@
|
||||
+===================================
|
||||
+NT synchronization primitive driver
|
||||
+===================================
|
||||
+
|
||||
+This page documents the user-space API for the ntsync driver.
|
||||
+
|
||||
+ntsync is a support driver for emulation of NT synchronization
|
||||
+primitives by user-space NT emulators. It exists because implementation
|
||||
+in user-space, using existing tools, cannot match Windows performance
|
||||
+while offering accurate semantics. It is implemented entirely in
|
||||
+software, and does not drive any hardware device.
|
||||
+
|
||||
+This interface is meant as a compatibility tool only, and should not
|
||||
+be used for general synchronization. Instead use generic, versatile
|
||||
+interfaces such as futex(2) and poll(2).
|
||||
+
|
||||
+Synchronization primitives
|
||||
+==========================
|
||||
+
|
||||
+The ntsync driver exposes three types of synchronization primitives:
|
||||
+semaphores, mutexes, and events.
|
||||
+
|
||||
+A semaphore holds a single volatile 32-bit counter, and a static 32-bit
|
||||
+integer denoting the maximum value. It is considered signaled (that is,
|
||||
+can be acquired without contention, or will wake up a waiting thread)
|
||||
+when the counter is nonzero. The counter is decremented by one when a
|
||||
+wait is satisfied. Both the initial and maximum count are established
|
||||
+when the semaphore is created.
|
||||
+
|
||||
+A mutex holds a volatile 32-bit recursion count, and a volatile 32-bit
|
||||
+identifier denoting its owner. A mutex is considered signaled when its
|
||||
+owner is zero (indicating that it is not owned). The recursion count is
|
||||
+incremented when a wait is satisfied, and ownership is set to the given
|
||||
+identifier.
|
||||
+
|
||||
+A mutex also holds an internal flag denoting whether its previous owner
|
||||
+has died; such a mutex is said to be abandoned. Owner death is not
|
||||
+tracked automatically based on thread death, but rather must be
|
||||
+communicated using ``NTSYNC_IOC_MUTEX_KILL``. An abandoned mutex is
|
||||
+inherently considered unowned.
|
||||
+
|
||||
+Except for the "unowned" semantics of zero, the actual value of the
|
||||
+owner identifier is not interpreted by the ntsync driver at all. The
|
||||
+intended use is to store a thread identifier; however, the ntsync
|
||||
+driver does not actually validate that a calling thread provides
|
||||
+consistent or unique identifiers.
|
||||
+
|
||||
+An event is similar to a semaphore with a maximum count of one. It holds
|
||||
+a volatile boolean state denoting whether it is signaled or not. There
|
||||
+are two types of events, auto-reset and manual-reset. An auto-reset
|
||||
+event is designaled when a wait is satisfied; a manual-reset event is
|
||||
+not. The event type is specified when the event is created.
|
||||
+
|
||||
+Unless specified otherwise, all operations on an object are atomic and
|
||||
+totally ordered with respect to other operations on the same object.
|
||||
+
|
||||
+Objects are represented by files. When all file descriptors to an
|
||||
+object are closed, that object is deleted.
|
||||
+
|
||||
+Char device
|
||||
+===========
|
||||
+
|
||||
+The ntsync driver creates a single char device /dev/ntsync. Each file
|
||||
+description opened on the device represents a unique instance intended
|
||||
+to back an individual NT virtual machine. Objects created by one ntsync
|
||||
+instance may only be used with other objects created by the same
|
||||
+instance.
|
||||
+
|
||||
+ioctl reference
|
||||
+===============
|
||||
+
|
||||
+All operations on the device are done through ioctls. There are four
|
||||
+structures used in ioctl calls::
|
||||
+
|
||||
+ struct ntsync_sem_args {
|
||||
+ __u32 sem;
|
||||
+ __u32 count;
|
||||
+ __u32 max;
|
||||
+ };
|
||||
+
|
||||
+ struct ntsync_mutex_args {
|
||||
+ __u32 mutex;
|
||||
+ __u32 owner;
|
||||
+ __u32 count;
|
||||
+ };
|
||||
+
|
||||
+ struct ntsync_event_args {
|
||||
+ __u32 event;
|
||||
+ __u32 signaled;
|
||||
+ __u32 manual;
|
||||
+ };
|
||||
+
|
||||
+ struct ntsync_wait_args {
|
||||
+ __u64 timeout;
|
||||
+ __u64 objs;
|
||||
+ __u32 count;
|
||||
+ __u32 owner;
|
||||
+ __u32 index;
|
||||
+ __u32 alert;
|
||||
+ __u32 flags;
|
||||
+ __u32 pad;
|
||||
+ };
|
||||
+
|
||||
+Depending on the ioctl, members of the structure may be used as input,
|
||||
+output, or not at all. All ioctls return 0 on success.
|
||||
+
|
||||
+The ioctls on the device file are as follows:
|
||||
+
|
||||
+.. c:macro:: NTSYNC_IOC_CREATE_SEM
|
||||
+
|
||||
+ Create a semaphore object. Takes a pointer to struct
|
||||
+ :c:type:`ntsync_sem_args`, which is used as follows:
|
||||
+
|
||||
+ .. list-table::
|
||||
+
|
||||
+ * - ``sem``
|
||||
+ - On output, contains a file descriptor to the created semaphore.
|
||||
+ * - ``count``
|
||||
+ - Initial count of the semaphore.
|
||||
+ * - ``max``
|
||||
+ - Maximum count of the semaphore.
|
||||
+
|
||||
+ Fails with ``EINVAL`` if ``count`` is greater than ``max``.
|
||||
+
|
||||
+.. c:macro:: NTSYNC_IOC_CREATE_MUTEX
|
||||
+
|
||||
+ Create a mutex object. Takes a pointer to struct
|
||||
+ :c:type:`ntsync_mutex_args`, which is used as follows:
|
||||
+
|
||||
+ .. list-table::
|
||||
+
|
||||
+ * - ``mutex``
|
||||
+ - On output, contains a file descriptor to the created mutex.
|
||||
+ * - ``count``
|
||||
+ - Initial recursion count of the mutex.
|
||||
+ * - ``owner``
|
||||
+ - Initial owner of the mutex.
|
||||
+
|
||||
+ If ``owner`` is nonzero and ``count`` is zero, or if ``owner`` is
|
||||
+ zero and ``count`` is nonzero, the function fails with ``EINVAL``.
|
||||
+
|
||||
+.. c:macro:: NTSYNC_IOC_CREATE_EVENT
|
||||
+
|
||||
+ Create an event object. Takes a pointer to struct
|
||||
+ :c:type:`ntsync_event_args`, which is used as follows:
|
||||
+
|
||||
+ .. list-table::
|
||||
+
|
||||
+ * - ``event``
|
||||
+ - On output, contains a file descriptor to the created event.
|
||||
+ * - ``signaled``
|
||||
+ - If nonzero, the event is initially signaled, otherwise
|
||||
+ nonsignaled.
|
||||
+ * - ``manual``
|
||||
+ - If nonzero, the event is a manual-reset event, otherwise
|
||||
+ auto-reset.
|
||||
+
|
||||
+The ioctls on the individual objects are as follows:
|
||||
+
|
||||
+.. c:macro:: NTSYNC_IOC_SEM_POST
|
||||
+
|
||||
+ Post to a semaphore object. Takes a pointer to a 32-bit integer,
|
||||
+ which on input holds the count to be added to the semaphore, and on
|
||||
+ output contains its previous count.
|
||||
+
|
||||
+ If adding to the semaphore's current count would raise the latter
|
||||
+ past the semaphore's maximum count, the ioctl fails with
|
||||
+ ``EOVERFLOW`` and the semaphore is not affected. If raising the
|
||||
+ semaphore's count causes it to become signaled, eligible threads
|
||||
+ waiting on this semaphore will be woken and the semaphore's count
|
||||
+ decremented appropriately.
|
||||
+
|
||||
+.. c:macro:: NTSYNC_IOC_MUTEX_UNLOCK
|
||||
+
|
||||
+ Release a mutex object. Takes a pointer to struct
|
||||
+ :c:type:`ntsync_mutex_args`, which is used as follows:
|
||||
+
|
||||
+ .. list-table::
|
||||
+
|
||||
+ * - ``mutex``
|
||||
+ - Ignored.
|
||||
+ * - ``owner``
|
||||
+ - Specifies the owner trying to release this mutex.
|
||||
+ * - ``count``
|
||||
+ - On output, contains the previous recursion count.
|
||||
+
|
||||
+ If ``owner`` is zero, the ioctl fails with ``EINVAL``. If ``owner``
|
||||
+ is not the current owner of the mutex, the ioctl fails with
|
||||
+ ``EPERM``.
|
||||
+
|
||||
+ The mutex's count will be decremented by one. If decrementing the
|
||||
+ mutex's count causes it to become zero, the mutex is marked as
|
||||
+ unowned and signaled, and eligible threads waiting on it will be
|
||||
+ woken as appropriate.
|
||||
+
|
||||
+.. c:macro:: NTSYNC_IOC_SET_EVENT
|
||||
+
|
||||
+ Signal an event object. Takes a pointer to a 32-bit integer, which on
|
||||
+ output contains the previous state of the event.
|
||||
+
|
||||
+ Eligible threads will be woken, and auto-reset events will be
|
||||
+ designaled appropriately.
|
||||
+
|
||||
+.. c:macro:: NTSYNC_IOC_RESET_EVENT
|
||||
+
|
||||
+ Designal an event object. Takes a pointer to a 32-bit integer, which
|
||||
+ on output contains the previous state of the event.
|
||||
+
|
||||
+.. c:macro:: NTSYNC_IOC_PULSE_EVENT
|
||||
+
|
||||
+ Wake threads waiting on an event object while leaving it in an
|
||||
+ unsignaled state. Takes a pointer to a 32-bit integer, which on
|
||||
+ output contains the previous state of the event.
|
||||
+
|
||||
+ A pulse operation can be thought of as a set followed by a reset,
|
||||
+ performed as a single atomic operation. If two threads are waiting on
|
||||
+ an auto-reset event which is pulsed, only one will be woken. If two
|
||||
+ threads are waiting a manual-reset event which is pulsed, both will
|
||||
+ be woken. However, in both cases, the event will be unsignaled
|
||||
+ afterwards, and a simultaneous read operation will always report the
|
||||
+ event as unsignaled.
|
||||
+
|
||||
+.. c:macro:: NTSYNC_IOC_READ_SEM
|
||||
+
|
||||
+ Read the current state of a semaphore object. Takes a pointer to
|
||||
+ struct :c:type:`ntsync_sem_args`, which is used as follows:
|
||||
+
|
||||
+ .. list-table::
|
||||
+
|
||||
+ * - ``sem``
|
||||
+ - Ignored.
|
||||
+ * - ``count``
|
||||
+ - On output, contains the current count of the semaphore.
|
||||
+ * - ``max``
|
||||
+ - On output, contains the maximum count of the semaphore.
|
||||
+
|
||||
+.. c:macro:: NTSYNC_IOC_READ_MUTEX
|
||||
+
|
||||
+ Read the current state of a mutex object. Takes a pointer to struct
|
||||
+ :c:type:`ntsync_mutex_args`, which is used as follows:
|
||||
+
|
||||
+ .. list-table::
|
||||
+
|
||||
+ * - ``mutex``
|
||||
+ - Ignored.
|
||||
+ * - ``owner``
|
||||
+ - On output, contains the current owner of the mutex, or zero
|
||||
+ if the mutex is not currently owned.
|
||||
+ * - ``count``
|
||||
+ - On output, contains the current recursion count of the mutex.
|
||||
+
|
||||
+ If the mutex is marked as abandoned, the function fails with
|
||||
+ ``EOWNERDEAD``. In this case, ``count`` and ``owner`` are set to
|
||||
+ zero.
|
||||
+
|
||||
+.. c:macro:: NTSYNC_IOC_READ_EVENT
|
||||
+
|
||||
+ Read the current state of an event object. Takes a pointer to struct
|
||||
+ :c:type:`ntsync_event_args`, which is used as follows:
|
||||
+
|
||||
+ .. list-table::
|
||||
+
|
||||
+ * - ``event``
|
||||
+ - Ignored.
|
||||
+ * - ``signaled``
|
||||
+ - On output, contains the current state of the event.
|
||||
+ * - ``manual``
|
||||
+ - On output, contains 1 if the event is a manual-reset event,
|
||||
+ and 0 otherwise.
|
||||
+
|
||||
+.. c:macro:: NTSYNC_IOC_KILL_OWNER
|
||||
+
|
||||
+ Mark a mutex as unowned and abandoned if it is owned by the given
|
||||
+ owner. Takes an input-only pointer to a 32-bit integer denoting the
|
||||
+ owner. If the owner is zero, the ioctl fails with ``EINVAL``. If the
|
||||
+ owner does not own the mutex, the function fails with ``EPERM``.
|
||||
+
|
||||
+ Eligible threads waiting on the mutex will be woken as appropriate
|
||||
+ (and such waits will fail with ``EOWNERDEAD``, as described below).
|
||||
+
|
||||
+.. c:macro:: NTSYNC_IOC_WAIT_ANY
|
||||
+
|
||||
+ Poll on any of a list of objects, atomically acquiring at most one.
|
||||
+ Takes a pointer to struct :c:type:`ntsync_wait_args`, which is
|
||||
+ used as follows:
|
||||
+
|
||||
+ .. list-table::
|
||||
+
|
||||
+ * - ``timeout``
|
||||
+ - Absolute timeout in nanoseconds. If ``NTSYNC_WAIT_REALTIME``
|
||||
+ is set, the timeout is measured against the REALTIME clock;
|
||||
+ otherwise it is measured against the MONOTONIC clock. If the
|
||||
+ timeout is equal to or earlier than the current time, the
|
||||
+ function returns immediately without sleeping. If ``timeout``
|
||||
+ is U64_MAX, the function will sleep until an object is
|
||||
+ signaled, and will not fail with ``ETIMEDOUT``.
|
||||
+ * - ``objs``
|
||||
+ - Pointer to an array of ``count`` file descriptors
|
||||
+ (specified as an integer so that the structure has the same
|
||||
+ size regardless of architecture). If any object is
|
||||
+ invalid, the function fails with ``EINVAL``.
|
||||
+ * - ``count``
|
||||
+ - Number of objects specified in the ``objs`` array.
|
||||
+ If greater than ``NTSYNC_MAX_WAIT_COUNT``, the function fails
|
||||
+ with ``EINVAL``.
|
||||
+ * - ``owner``
|
||||
+ - Mutex owner identifier. If any object in ``objs`` is a mutex,
|
||||
+ the ioctl will attempt to acquire that mutex on behalf of
|
||||
+ ``owner``. If ``owner`` is zero, the ioctl fails with
|
||||
+ ``EINVAL``.
|
||||
+ * - ``index``
|
||||
+ - On success, contains the index (into ``objs``) of the object
|
||||
+ which was signaled. If ``alert`` was signaled instead,
|
||||
+ this contains ``count``.
|
||||
+ * - ``alert``
|
||||
+ - Optional event object file descriptor. If nonzero, this
|
||||
+ specifies an "alert" event object which, if signaled, will
|
||||
+ terminate the wait. If nonzero, the identifier must point to a
|
||||
+ valid event.
|
||||
+ * - ``flags``
|
||||
+ - Zero or more flags. Currently the only flag is
|
||||
+ ``NTSYNC_WAIT_REALTIME``, which causes the timeout to be
|
||||
+ measured against the REALTIME clock instead of MONOTONIC.
|
||||
+ * - ``pad``
|
||||
+ - Unused, must be set to zero.
|
||||
+
|
||||
+ This function attempts to acquire one of the given objects. If unable
|
||||
+ to do so, it sleeps until an object becomes signaled, subsequently
|
||||
+ acquiring it, or the timeout expires. In the latter case the ioctl
|
||||
+ fails with ``ETIMEDOUT``. The function only acquires one object, even
|
||||
+ if multiple objects are signaled.
|
||||
+
|
||||
+ A semaphore is considered to be signaled if its count is nonzero, and
|
||||
+ is acquired by decrementing its count by one. A mutex is considered
|
||||
+ to be signaled if it is unowned or if its owner matches the ``owner``
|
||||
+ argument, and is acquired by incrementing its recursion count by one
|
||||
+ and setting its owner to the ``owner`` argument. An auto-reset event
|
||||
+ is acquired by designaling it; a manual-reset event is not affected
|
||||
+ by acquisition.
|
||||
+
|
||||
+ Acquisition is atomic and totally ordered with respect to other
|
||||
+ operations on the same object. If two wait operations (with different
|
||||
+ ``owner`` identifiers) are queued on the same mutex, only one is
|
||||
+ signaled. If two wait operations are queued on the same semaphore,
|
||||
+ and a value of one is posted to it, only one is signaled.
|
||||
+
|
||||
+ If an abandoned mutex is acquired, the ioctl fails with
|
||||
+ ``EOWNERDEAD``. Although this is a failure return, the function may
|
||||
+ otherwise be considered successful. The mutex is marked as owned by
|
||||
+ the given owner (with a recursion count of 1) and as no longer
|
||||
+ abandoned, and ``index`` is still set to the index of the mutex.
|
||||
+
|
||||
+ The ``alert`` argument is an "extra" event which can terminate the
|
||||
+ wait, independently of all other objects.
|
||||
+
|
||||
+ It is valid to pass the same object more than once, including by
|
||||
+ passing the same event in the ``objs`` array and in ``alert``. If a
|
||||
+ wakeup occurs due to that object being signaled, ``index`` is set to
|
||||
+ the lowest index corresponding to that object.
|
||||
+
|
||||
+ The function may fail with ``EINTR`` if a signal is received.
|
||||
+
|
||||
+.. c:macro:: NTSYNC_IOC_WAIT_ALL
|
||||
+
|
||||
+ Poll on a list of objects, atomically acquiring all of them. Takes a
|
||||
+ pointer to struct :c:type:`ntsync_wait_args`, which is used
|
||||
+ identically to ``NTSYNC_IOC_WAIT_ANY``, except that ``index`` is
|
||||
+ always filled with zero on success if not woken via alert.
|
||||
+
|
||||
+ This function attempts to simultaneously acquire all of the given
|
||||
+ objects. If unable to do so, it sleeps until all objects become
|
||||
+ simultaneously signaled, subsequently acquiring them, or the timeout
|
||||
+ expires. In the latter case the ioctl fails with ``ETIMEDOUT`` and no
|
||||
+ objects are modified.
|
||||
+
|
||||
+ Objects may become signaled and subsequently designaled (through
|
||||
+ acquisition by other threads) while this thread is sleeping. Only
|
||||
+ once all objects are simultaneously signaled does the ioctl acquire
|
||||
+ them and return. The entire acquisition is atomic and totally ordered
|
||||
+ with respect to other operations on any of the given objects.
|
||||
+
|
||||
+ If an abandoned mutex is acquired, the ioctl fails with
|
||||
+ ``EOWNERDEAD``. Similarly to ``NTSYNC_IOC_WAIT_ANY``, all objects are
|
||||
+ nevertheless marked as acquired. Note that if multiple mutex objects
|
||||
+ are specified, there is no way to know which were marked as
|
||||
+ abandoned.
|
||||
+
|
||||
+ As with "any" waits, the ``alert`` argument is an "extra" event which
|
||||
+ can terminate the wait. Critically, however, an "all" wait will
|
||||
+ succeed if all members in ``objs`` are signaled, *or* if ``alert`` is
|
||||
+ signaled. In the latter case ``index`` will be set to ``count``. As
|
||||
+ with "any" waits, if both conditions are filled, the former takes
|
||||
+ priority, and objects in ``objs`` will be acquired.
|
||||
+
|
||||
+ Unlike ``NTSYNC_IOC_WAIT_ANY``, it is not valid to pass the same
|
||||
+ object more than once, nor is it valid to pass the same object in
|
||||
+ ``objs`` and in ``alert``. If this is attempted, the function fails
|
||||
+ with ``EINVAL``.
|
21
debian/patches/misc-ntsync5/0016-Revert-misc-ntsync-mark-driver-as-broken-to-prevent-.patch
vendored
Normal file
21
debian/patches/misc-ntsync5/0016-Revert-misc-ntsync-mark-driver-as-broken-to-prevent-.patch
vendored
Normal file
@ -0,0 +1,21 @@
|
||||
From 06bc88f16094c6f38e0890992af4a32415716c5d Mon Sep 17 00:00:00 2001
|
||||
From: "Jan Alexander Steffens (heftig)" <heftig@archlinux.org>
|
||||
Date: Thu, 18 Jul 2024 21:19:39 +0200
|
||||
Subject: Revert "misc: ntsync: mark driver as "broken" to prevent from
|
||||
building"
|
||||
|
||||
This reverts commit f5b335dc025cfee90957efa90dc72fada0d5abb4.
|
||||
---
|
||||
drivers/misc/Kconfig | 1 -
|
||||
1 file changed, 1 deletion(-)
|
||||
|
||||
--- a/drivers/misc/Kconfig
|
||||
+++ b/drivers/misc/Kconfig
|
||||
@@ -507,7 +507,6 @@ config OPEN_DICE
|
||||
|
||||
config NTSYNC
|
||||
tristate "NT synchronization primitive emulation"
|
||||
- depends on BROKEN
|
||||
help
|
||||
This module provides kernel support for emulation of Windows NT
|
||||
synchronization primitives. It is not a hardware driver.
|
32
debian/patches/misc-openwrt/0001-mac80211-ignore-AP-power-level-when-tx-power-type-is.patch
vendored
Normal file
32
debian/patches/misc-openwrt/0001-mac80211-ignore-AP-power-level-when-tx-power-type-is.patch
vendored
Normal file
@ -0,0 +1,32 @@
|
||||
From 1fd57f0a5e998d99035ecb3b110cbdb588403c83 Mon Sep 17 00:00:00 2001
|
||||
From: Felix Fietkau <nbd@openwrt.org>
|
||||
Date: Sat, 5 Dec 2015 15:07:03 +0100
|
||||
Subject: [PATCH] mac80211: ignore AP power level when tx power type is "fixed"
|
||||
|
||||
In some cases a user might want to connect to a far away access point,
|
||||
which announces a low tx power limit. Using the AP's power limit can
|
||||
make the connection significantly more unstable or even impossible, and
|
||||
mac80211 currently provides no way to disable this behavior.
|
||||
|
||||
To fix this, use the currently unused distinction between limited and
|
||||
fixed tx power to decide whether a remote AP's power limit should be
|
||||
accepted.
|
||||
|
||||
Signed-off-by: Felix Fietkau <nbd@openwrt.org>
|
||||
Signed-off-by: Alexandre Frade <kernel@xanmod.org>
|
||||
---
|
||||
net/mac80211/iface.c | 3 ++-
|
||||
1 file changed, 2 insertions(+), 1 deletion(-)
|
||||
|
||||
--- a/net/mac80211/iface.c
|
||||
+++ b/net/mac80211/iface.c
|
||||
@@ -62,7 +62,8 @@ bool __ieee80211_recalc_txpower(struct i
|
||||
if (sdata->deflink.user_power_level != IEEE80211_UNSET_POWER_LEVEL)
|
||||
power = min(power, sdata->deflink.user_power_level);
|
||||
|
||||
- if (sdata->deflink.ap_power_level != IEEE80211_UNSET_POWER_LEVEL)
|
||||
+ if (sdata->deflink.ap_power_level != IEEE80211_UNSET_POWER_LEVEL &&
|
||||
+ sdata->vif.bss_conf.txpower_type != NL80211_TX_POWER_FIXED)
|
||||
power = min(power, sdata->deflink.ap_power_level);
|
||||
|
||||
if (power != sdata->vif.bss_conf.txpower) {
|
24
debian/patches/misc-openwrt/0002-net-enable-fraglist-GRO-by-default.patch
vendored
Normal file
24
debian/patches/misc-openwrt/0002-net-enable-fraglist-GRO-by-default.patch
vendored
Normal file
@ -0,0 +1,24 @@
|
||||
From: Felix Fietkau <nbd@nbd.name>
|
||||
Date: Tue, 23 Apr 2024 12:35:21 +0200
|
||||
Subject: [PATCH] net: enable fraglist GRO by default
|
||||
|
||||
This can significantly improve performance for packet forwarding/bridging
|
||||
|
||||
Signed-off-by: Felix Fietkau <nbd@nbd.name>
|
||||
---
|
||||
|
||||
--- a/include/linux/netdev_features.h
|
||||
+++ b/include/linux/netdev_features.h
|
||||
@@ -242,10 +242,10 @@ static inline int find_next_netdev_featu
|
||||
#define NETIF_F_UPPER_DISABLES NETIF_F_LRO
|
||||
|
||||
/* changeable features with no special hardware requirements */
|
||||
-#define NETIF_F_SOFT_FEATURES (NETIF_F_GSO | NETIF_F_GRO)
|
||||
+#define NETIF_F_SOFT_FEATURES (NETIF_F_GSO | NETIF_F_GRO | NETIF_F_GRO_FRAGLIST)
|
||||
|
||||
/* Changeable features with no special hardware requirements that defaults to off. */
|
||||
-#define NETIF_F_SOFT_FEATURES_OFF (NETIF_F_GRO_FRAGLIST | NETIF_F_GRO_UDP_FWD)
|
||||
+#define NETIF_F_SOFT_FEATURES_OFF (NETIF_F_GRO_UDP_FWD)
|
||||
|
||||
#define NETIF_F_VLAN_FEATURES (NETIF_F_HW_VLAN_CTAG_FILTER | \
|
||||
NETIF_F_HW_VLAN_CTAG_RX | \
|
129
debian/patches/misc-openwrt/0003-net-remove-NETIF_F_GSO_FRAGLIST-from-NETIF_F_GSO_SOF.patch
vendored
Normal file
129
debian/patches/misc-openwrt/0003-net-remove-NETIF_F_GSO_FRAGLIST-from-NETIF_F_GSO_SOF.patch
vendored
Normal file
@ -0,0 +1,129 @@
|
||||
From: Felix Fietkau <nbd@nbd.name>
|
||||
Date: Thu, 15 Aug 2024 21:15:13 +0200
|
||||
Subject: [PATCH] net: remove NETIF_F_GSO_FRAGLIST from NETIF_F_GSO_SOFTWARE
|
||||
|
||||
Several drivers set NETIF_F_GSO_SOFTWARE, but mangle fraglist GRO packets
|
||||
in a way that they can't be properly segmented anymore.
|
||||
In order to properly deal with this, remove fraglist GSO from
|
||||
NETIF_F_GSO_SOFTWARE and switch to NETIF_F_GSO_SOFTWARE_ALL (which includes
|
||||
fraglist GSO) in places where it's safe to add.
|
||||
|
||||
Signed-off-by: Felix Fietkau <nbd@nbd.name>
|
||||
---
|
||||
|
||||
--- a/drivers/net/dummy.c
|
||||
+++ b/drivers/net/dummy.c
|
||||
@@ -110,7 +110,7 @@ static void dummy_setup(struct net_devic
|
||||
dev->flags &= ~IFF_MULTICAST;
|
||||
dev->priv_flags |= IFF_LIVE_ADDR_CHANGE | IFF_NO_QUEUE;
|
||||
dev->features |= NETIF_F_SG | NETIF_F_FRAGLIST;
|
||||
- dev->features |= NETIF_F_GSO_SOFTWARE;
|
||||
+ dev->features |= NETIF_F_GSO_SOFTWARE_ALL;
|
||||
dev->features |= NETIF_F_HW_CSUM | NETIF_F_HIGHDMA | NETIF_F_LLTX;
|
||||
dev->features |= NETIF_F_GSO_ENCAP_ALL;
|
||||
dev->hw_features |= dev->features;
|
||||
--- a/drivers/net/loopback.c
|
||||
+++ b/drivers/net/loopback.c
|
||||
@@ -172,7 +172,7 @@ static void gen_lo_setup(struct net_devi
|
||||
dev->flags = IFF_LOOPBACK;
|
||||
dev->priv_flags |= IFF_LIVE_ADDR_CHANGE | IFF_NO_QUEUE;
|
||||
netif_keep_dst(dev);
|
||||
- dev->hw_features = NETIF_F_GSO_SOFTWARE;
|
||||
+ dev->hw_features = NETIF_F_GSO_SOFTWARE_ALL;
|
||||
dev->features = NETIF_F_SG | NETIF_F_FRAGLIST
|
||||
| NETIF_F_GSO_SOFTWARE
|
||||
| NETIF_F_HW_CSUM
|
||||
--- a/drivers/net/macvlan.c
|
||||
+++ b/drivers/net/macvlan.c
|
||||
@@ -897,7 +897,7 @@ static int macvlan_hwtstamp_set(struct n
|
||||
static struct lock_class_key macvlan_netdev_addr_lock_key;
|
||||
|
||||
#define ALWAYS_ON_OFFLOADS \
|
||||
- (NETIF_F_SG | NETIF_F_HW_CSUM | NETIF_F_GSO_SOFTWARE | \
|
||||
+ (NETIF_F_SG | NETIF_F_HW_CSUM | NETIF_F_GSO_SOFTWARE_ALL | \
|
||||
NETIF_F_GSO_ROBUST | NETIF_F_GSO_ENCAP_ALL)
|
||||
|
||||
#define ALWAYS_ON_FEATURES (ALWAYS_ON_OFFLOADS | NETIF_F_LLTX)
|
||||
--- a/include/linux/netdev_features.h
|
||||
+++ b/include/linux/netdev_features.h
|
||||
@@ -219,13 +219,14 @@ static inline int find_next_netdev_featu
|
||||
|
||||
/* List of features with software fallbacks. */
|
||||
#define NETIF_F_GSO_SOFTWARE (NETIF_F_ALL_TSO | NETIF_F_GSO_SCTP | \
|
||||
- NETIF_F_GSO_UDP_L4 | NETIF_F_GSO_FRAGLIST)
|
||||
+ NETIF_F_GSO_UDP_L4)
|
||||
+#define NETIF_F_GSO_SOFTWARE_ALL (NETIF_F_GSO_SOFTWARE | NETIF_F_GSO_FRAGLIST)
|
||||
|
||||
/*
|
||||
* If one device supports one of these features, then enable them
|
||||
* for all in netdev_increment_features.
|
||||
*/
|
||||
-#define NETIF_F_ONE_FOR_ALL (NETIF_F_GSO_SOFTWARE | NETIF_F_GSO_ROBUST | \
|
||||
+#define NETIF_F_ONE_FOR_ALL (NETIF_F_GSO_SOFTWARE_ALL | NETIF_F_GSO_ROBUST | \
|
||||
NETIF_F_SG | NETIF_F_HIGHDMA | \
|
||||
NETIF_F_FRAGLIST | NETIF_F_VLAN_CHALLENGED)
|
||||
|
||||
--- a/net/8021q/vlan.h
|
||||
+++ b/net/8021q/vlan.h
|
||||
@@ -108,7 +108,7 @@ static inline netdev_features_t vlan_tnl
|
||||
netdev_features_t ret;
|
||||
|
||||
ret = real_dev->hw_enc_features &
|
||||
- (NETIF_F_CSUM_MASK | NETIF_F_GSO_SOFTWARE |
|
||||
+ (NETIF_F_CSUM_MASK | NETIF_F_GSO_SOFTWARE_ALL |
|
||||
NETIF_F_GSO_ENCAP_ALL);
|
||||
|
||||
if ((ret & NETIF_F_GSO_ENCAP_ALL) && (ret & NETIF_F_CSUM_MASK))
|
||||
--- a/net/8021q/vlan_dev.c
|
||||
+++ b/net/8021q/vlan_dev.c
|
||||
@@ -561,7 +561,7 @@ static int vlan_dev_init(struct net_devi
|
||||
dev->state |= (1 << __LINK_STATE_NOCARRIER);
|
||||
|
||||
dev->hw_features = NETIF_F_HW_CSUM | NETIF_F_SG |
|
||||
- NETIF_F_FRAGLIST | NETIF_F_GSO_SOFTWARE |
|
||||
+ NETIF_F_FRAGLIST | NETIF_F_GSO_SOFTWARE_ALL |
|
||||
NETIF_F_GSO_ENCAP_ALL |
|
||||
NETIF_F_HIGHDMA | NETIF_F_SCTP_CRC |
|
||||
NETIF_F_ALL_FCOE;
|
||||
@@ -654,7 +654,7 @@ static netdev_features_t vlan_dev_fix_fe
|
||||
if (lower_features & (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))
|
||||
lower_features |= NETIF_F_HW_CSUM;
|
||||
features = netdev_intersect_features(features, lower_features);
|
||||
- features |= old_features & (NETIF_F_SOFT_FEATURES | NETIF_F_GSO_SOFTWARE);
|
||||
+ features |= old_features & (NETIF_F_SOFT_FEATURES | NETIF_F_GSO_SOFTWARE_ALL);
|
||||
features |= NETIF_F_LLTX;
|
||||
|
||||
return features;
|
||||
--- a/net/core/sock.c
|
||||
+++ b/net/core/sock.c
|
||||
@@ -2451,7 +2451,7 @@ void sk_setup_caps(struct sock *sk, stru
|
||||
if (sk_is_tcp(sk))
|
||||
sk->sk_route_caps |= NETIF_F_GSO;
|
||||
if (sk->sk_route_caps & NETIF_F_GSO)
|
||||
- sk->sk_route_caps |= NETIF_F_GSO_SOFTWARE;
|
||||
+ sk->sk_route_caps |= NETIF_F_GSO_SOFTWARE_ALL;
|
||||
if (unlikely(sk->sk_gso_disabled))
|
||||
sk->sk_route_caps &= ~NETIF_F_GSO_MASK;
|
||||
if (sk_can_gso(sk)) {
|
||||
--- a/net/mac80211/ieee80211_i.h
|
||||
+++ b/net/mac80211/ieee80211_i.h
|
||||
@@ -2009,7 +2009,7 @@ void ieee80211_color_collision_detection
|
||||
/* interface handling */
|
||||
#define MAC80211_SUPPORTED_FEATURES_TX (NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM | \
|
||||
NETIF_F_HW_CSUM | NETIF_F_SG | \
|
||||
- NETIF_F_HIGHDMA | NETIF_F_GSO_SOFTWARE | \
|
||||
+ NETIF_F_HIGHDMA | NETIF_F_GSO_SOFTWARE_ALL | \
|
||||
NETIF_F_HW_TC)
|
||||
#define MAC80211_SUPPORTED_FEATURES_RX (NETIF_F_RXCSUM)
|
||||
#define MAC80211_SUPPORTED_FEATURES (MAC80211_SUPPORTED_FEATURES_TX | \
|
||||
--- a/net/openvswitch/vport-internal_dev.c
|
||||
+++ b/net/openvswitch/vport-internal_dev.c
|
||||
@@ -109,7 +109,7 @@ static void do_setup(struct net_device *
|
||||
|
||||
netdev->features = NETIF_F_LLTX | NETIF_F_SG | NETIF_F_FRAGLIST |
|
||||
NETIF_F_HIGHDMA | NETIF_F_HW_CSUM |
|
||||
- NETIF_F_GSO_SOFTWARE | NETIF_F_GSO_ENCAP_ALL;
|
||||
+ NETIF_F_GSO_SOFTWARE_ALL | NETIF_F_GSO_ENCAP_ALL;
|
||||
|
||||
netdev->vlan_features = netdev->features;
|
||||
netdev->hw_enc_features = netdev->features;
|
762
debian/patches/mixed-arch/0001-ZEN-Add-graysky-s-more-ISA-levels-and-uarches.patch
vendored
Normal file
762
debian/patches/mixed-arch/0001-ZEN-Add-graysky-s-more-ISA-levels-and-uarches.patch
vendored
Normal file
@ -0,0 +1,762 @@
|
||||
From a657df31affbb91d8cb2718e70f42cf8ed6e9a7c Mon Sep 17 00:00:00 2001
|
||||
From: graysky <therealgraysky AT proton DOT me>
|
||||
Date: Mon, 16 Sep 2024 05:55:58 -0400
|
||||
Subject: ZEN: Add graysky's more-ISA-levels-and-uarches
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
From https://github.com/graysky2/kernel_compiler_patch
|
||||
|
||||
more-ISA-levels-and-uarches-for-kernel-6.8-rc4+.patch
|
||||
|
||||
FEATURES
|
||||
This patch adds additional tunings via new x86-64 ISA levels and
|
||||
more micro-architecture options to the Linux kernel in three classes.
|
||||
|
||||
1. New generic x86-64 ISA levels
|
||||
|
||||
These are selectable under:
|
||||
Processor type and features ---> x86-64 compiler ISA level
|
||||
|
||||
• x86-64 A value of (1) is the default
|
||||
• x86-64-v2 A value of (2) brings support for vector
|
||||
instructions up to Streaming SIMD Extensions 4.2 (SSE4.2)
|
||||
and Supplemental Streaming SIMD Extensions 3 (SSSE3), the
|
||||
POPCNT instruction, and CMPXCHG16B.
|
||||
• x86-64-v3 A value of (3) adds vector instructions up to AVX2, MOVBE,
|
||||
and additional bit-manipulation instructions.
|
||||
|
||||
There is also x86-64-v4 but including this makes little sense as
|
||||
the kernel does not use any of the AVX512 instructions anyway.
|
||||
|
||||
Users of glibc 2.33 and above can see which level is supported by running:
|
||||
/lib/ld-linux-x86-64.so.2 --help | grep supported
|
||||
Or
|
||||
/lib64/ld-linux-x86-64.so.2 --help | grep supported
|
||||
|
||||
2. New micro-architectures
|
||||
|
||||
These are selectable under:
|
||||
Processor type and features ---> Processor family
|
||||
|
||||
• AMD Improved K8-family
|
||||
• AMD K10-family
|
||||
• AMD Family 10h (Barcelona)
|
||||
• AMD Family 14h (Bobcat)
|
||||
• AMD Family 16h (Jaguar)
|
||||
• AMD Family 15h (Bulldozer)
|
||||
• AMD Family 15h (Piledriver)
|
||||
• AMD Family 15h (Steamroller)
|
||||
• AMD Family 15h (Excavator)
|
||||
• AMD Family 17h (Zen)
|
||||
• AMD Family 17h (Zen 2)
|
||||
• AMD Family 19h (Zen 3)**
|
||||
• AMD Family 19h (Zen 4)‡
|
||||
• AMD Family 1Ah (Zen 5)§
|
||||
• Intel Silvermont low-power processors
|
||||
• Intel Goldmont low-power processors (Apollo Lake and Denverton)
|
||||
• Intel Goldmont Plus low-power processors (Gemini Lake)
|
||||
• Intel 1st Gen Core i3/i5/i7 (Nehalem)
|
||||
• Intel 1.5 Gen Core i3/i5/i7 (Westmere)
|
||||
• Intel 2nd Gen Core i3/i5/i7 (Sandybridge)
|
||||
• Intel 3rd Gen Core i3/i5/i7 (Ivybridge)
|
||||
• Intel 4th Gen Core i3/i5/i7 (Haswell)
|
||||
• Intel 5th Gen Core i3/i5/i7 (Broadwell)
|
||||
• Intel 6th Gen Core i3/i5/i7 (Skylake)
|
||||
• Intel 6th Gen Core i7/i9 (Skylake X)
|
||||
• Intel 8th Gen Core i3/i5/i7 (Cannon Lake)
|
||||
• Intel 10th Gen Core i7/i9 (Ice Lake)
|
||||
• Intel Xeon (Cascade Lake)
|
||||
• Intel Xeon (Cooper Lake)*
|
||||
• Intel 3rd Gen 10nm++ i3/i5/i7/i9-family (Tiger Lake)*
|
||||
• Intel 4th Gen 10nm++ Xeon (Sapphire Rapids)†
|
||||
• Intel 11th Gen i3/i5/i7/i9-family (Rocket Lake)†
|
||||
• Intel 12th Gen i3/i5/i7/i9-family (Alder Lake)†
|
||||
• Intel 13th Gen i3/i5/i7/i9-family (Raptor Lake)‡
|
||||
• Intel 14th Gen i3/i5/i7/i9-family (Meteor Lake)‡
|
||||
• Intel 5th Gen 10nm++ Xeon (Emerald Rapids)‡
|
||||
|
||||
Notes: If not otherwise noted, gcc >=9.1 is required for support.
|
||||
*Requires gcc >=10.1 or clang >=10.0
|
||||
**Required gcc >=10.3 or clang >=12.0
|
||||
†Required gcc >=11.1 or clang >=12.0
|
||||
‡Required gcc >=13.0 or clang >=15.0.5
|
||||
§Required gcc >14.0 or clang >=19.0?
|
||||
|
||||
3. Auto-detected micro-architecture levels
|
||||
|
||||
Compile by passing the '-march=native' option which, "selects the CPU
|
||||
to generate code for at compilation time by determining the processor type of
|
||||
the compiling machine. Using -march=native enables all instruction subsets
|
||||
supported by the local machine and will produce code optimized for the local
|
||||
machine under the constraints of the selected instruction set."[1]
|
||||
|
||||
Users of Intel CPUs should select the 'Intel-Native' option and users of AMD
|
||||
CPUs should select the 'AMD-Native' option.
|
||||
|
||||
MINOR NOTES RELATING TO INTEL ATOM PROCESSORS
|
||||
This patch also changes -march=atom to -march=bonnell in accordance with the
|
||||
gcc v4.9 changes. Upstream is using the deprecated -match=atom flags when I
|
||||
believe it should use the newer -march=bonnell flag for atom processors.[2]
|
||||
|
||||
It is not recommended to compile on Atom-CPUs with the 'native' option.[3] The
|
||||
recommendation is to use the 'atom' option instead.
|
||||
|
||||
BENEFITS
|
||||
Small but real speed increases are measurable using a make endpoint comparing
|
||||
a generic kernel to one built with one of the respective microarchs.
|
||||
|
||||
See the following experimental evidence supporting this statement:
|
||||
https://github.com/graysky2/kernel_compiler_patch?tab=readme-ov-file#benchmarks
|
||||
|
||||
REQUIREMENTS
|
||||
linux version 6.8-rc3+
|
||||
gcc version >=9.0 or clang version >=9.0
|
||||
|
||||
ACKNOWLEDGMENTS
|
||||
This patch builds on the seminal work by Jeroen.[4]
|
||||
|
||||
REFERENCES
|
||||
1. https://gcc.gnu.org/onlinedocs/gcc/x86-Options.html#index-x86-Options
|
||||
2. https://bugzilla.kernel.org/show_bug.cgi?id=77461
|
||||
3. https://github.com/graysky2/kernel_gcc_patch/issues/15
|
||||
4. http://www.linuxforge.net/docs/linux/linux-gcc.php
|
||||
---
|
||||
arch/x86/Kconfig.cpu | 363 ++++++++++++++++++++++++++++++--
|
||||
arch/x86/Makefile | 89 +++++++-
|
||||
arch/x86/include/asm/vermagic.h | 70 ++++++
|
||||
3 files changed, 506 insertions(+), 16 deletions(-)
|
||||
|
||||
--- a/arch/x86/Kconfig.cpu
|
||||
+++ b/arch/x86/Kconfig.cpu
|
||||
@@ -155,9 +155,8 @@ config MPENTIUM4
|
||||
-Paxville
|
||||
-Dempsey
|
||||
|
||||
-
|
||||
config MK6
|
||||
- bool "K6/K6-II/K6-III"
|
||||
+ bool "AMD K6/K6-II/K6-III"
|
||||
depends on X86_32
|
||||
help
|
||||
Select this for an AMD K6-family processor. Enables use of
|
||||
@@ -165,7 +164,7 @@ config MK6
|
||||
flags to GCC.
|
||||
|
||||
config MK7
|
||||
- bool "Athlon/Duron/K7"
|
||||
+ bool "AMD Athlon/Duron/K7"
|
||||
depends on X86_32
|
||||
help
|
||||
Select this for an AMD Athlon K7-family processor. Enables use of
|
||||
@@ -173,12 +172,114 @@ config MK7
|
||||
flags to GCC.
|
||||
|
||||
config MK8
|
||||
- bool "Opteron/Athlon64/Hammer/K8"
|
||||
+ bool "AMD Opteron/Athlon64/Hammer/K8"
|
||||
help
|
||||
Select this for an AMD Opteron or Athlon64 Hammer-family processor.
|
||||
Enables use of some extended instructions, and passes appropriate
|
||||
optimization flags to GCC.
|
||||
|
||||
+config MK8SSE3
|
||||
+ bool "AMD Opteron/Athlon64/Hammer/K8 with SSE3"
|
||||
+ help
|
||||
+ Select this for improved AMD Opteron or Athlon64 Hammer-family processors.
|
||||
+ Enables use of some extended instructions, and passes appropriate
|
||||
+ optimization flags to GCC.
|
||||
+
|
||||
+config MK10
|
||||
+ bool "AMD 61xx/7x50/PhenomX3/X4/II/K10"
|
||||
+ help
|
||||
+ Select this for an AMD 61xx Eight-Core Magny-Cours, Athlon X2 7x50,
|
||||
+ Phenom X3/X4/II, Athlon II X2/X3/X4, or Turion II-family processor.
|
||||
+ Enables use of some extended instructions, and passes appropriate
|
||||
+ optimization flags to GCC.
|
||||
+
|
||||
+config MBARCELONA
|
||||
+ bool "AMD Barcelona"
|
||||
+ help
|
||||
+ Select this for AMD Family 10h Barcelona processors.
|
||||
+
|
||||
+ Enables -march=barcelona
|
||||
+
|
||||
+config MBOBCAT
|
||||
+ bool "AMD Bobcat"
|
||||
+ help
|
||||
+ Select this for AMD Family 14h Bobcat processors.
|
||||
+
|
||||
+ Enables -march=btver1
|
||||
+
|
||||
+config MJAGUAR
|
||||
+ bool "AMD Jaguar"
|
||||
+ help
|
||||
+ Select this for AMD Family 16h Jaguar processors.
|
||||
+
|
||||
+ Enables -march=btver2
|
||||
+
|
||||
+config MBULLDOZER
|
||||
+ bool "AMD Bulldozer"
|
||||
+ help
|
||||
+ Select this for AMD Family 15h Bulldozer processors.
|
||||
+
|
||||
+ Enables -march=bdver1
|
||||
+
|
||||
+config MPILEDRIVER
|
||||
+ bool "AMD Piledriver"
|
||||
+ help
|
||||
+ Select this for AMD Family 15h Piledriver processors.
|
||||
+
|
||||
+ Enables -march=bdver2
|
||||
+
|
||||
+config MSTEAMROLLER
|
||||
+ bool "AMD Steamroller"
|
||||
+ help
|
||||
+ Select this for AMD Family 15h Steamroller processors.
|
||||
+
|
||||
+ Enables -march=bdver3
|
||||
+
|
||||
+config MEXCAVATOR
|
||||
+ bool "AMD Excavator"
|
||||
+ help
|
||||
+ Select this for AMD Family 15h Excavator processors.
|
||||
+
|
||||
+ Enables -march=bdver4
|
||||
+
|
||||
+config MZEN
|
||||
+ bool "AMD Zen"
|
||||
+ help
|
||||
+ Select this for AMD Family 17h Zen processors.
|
||||
+
|
||||
+ Enables -march=znver1
|
||||
+
|
||||
+config MZEN2
|
||||
+ bool "AMD Zen 2"
|
||||
+ help
|
||||
+ Select this for AMD Family 17h Zen 2 processors.
|
||||
+
|
||||
+ Enables -march=znver2
|
||||
+
|
||||
+config MZEN3
|
||||
+ bool "AMD Zen 3"
|
||||
+ depends on (CC_IS_GCC && GCC_VERSION >= 100300) || (CC_IS_CLANG && CLANG_VERSION >= 120000)
|
||||
+ help
|
||||
+ Select this for AMD Family 19h Zen 3 processors.
|
||||
+
|
||||
+ Enables -march=znver3
|
||||
+
|
||||
+config MZEN4
|
||||
+ bool "AMD Zen 4"
|
||||
+ depends on (CC_IS_GCC && GCC_VERSION >= 130000) || (CC_IS_CLANG && CLANG_VERSION >= 160000)
|
||||
+ help
|
||||
+ Select this for AMD Family 19h Zen 4 processors.
|
||||
+
|
||||
+ Enables -march=znver4
|
||||
+
|
||||
+config MZEN5
|
||||
+ bool "AMD Zen 5"
|
||||
+ depends on (CC_IS_GCC && GCC_VERSION > 140000) || (CC_IS_CLANG && CLANG_VERSION >= 191000)
|
||||
+ help
|
||||
+ Select this for AMD Family 19h Zen 5 processors.
|
||||
+
|
||||
+ Enables -march=znver5
|
||||
+
|
||||
config MCRUSOE
|
||||
bool "Crusoe"
|
||||
depends on X86_32
|
||||
@@ -269,8 +370,17 @@ config MPSC
|
||||
using the cpu family field
|
||||
in /proc/cpuinfo. Family 15 is an older Xeon, Family 6 a newer one.
|
||||
|
||||
+config MATOM
|
||||
+ bool "Intel Atom"
|
||||
+ help
|
||||
+
|
||||
+ Select this for the Intel Atom platform. Intel Atom CPUs have an
|
||||
+ in-order pipelining architecture and thus can benefit from
|
||||
+ accordingly optimized code. Use a recent GCC with specific Atom
|
||||
+ support in order to fully benefit from selecting this option.
|
||||
+
|
||||
config MCORE2
|
||||
- bool "Core 2/newer Xeon"
|
||||
+ bool "Intel Core 2"
|
||||
help
|
||||
|
||||
Select this for Intel Core 2 and newer Core 2 Xeons (Xeon 51xx and
|
||||
@@ -278,14 +388,191 @@ config MCORE2
|
||||
family in /proc/cpuinfo. Newer ones have 6 and older ones 15
|
||||
(not a typo)
|
||||
|
||||
-config MATOM
|
||||
- bool "Intel Atom"
|
||||
+ Enables -march=core2
|
||||
+
|
||||
+config MNEHALEM
|
||||
+ bool "Intel Nehalem"
|
||||
help
|
||||
|
||||
- Select this for the Intel Atom platform. Intel Atom CPUs have an
|
||||
- in-order pipelining architecture and thus can benefit from
|
||||
- accordingly optimized code. Use a recent GCC with specific Atom
|
||||
- support in order to fully benefit from selecting this option.
|
||||
+ Select this for 1st Gen Core processors in the Nehalem family.
|
||||
+
|
||||
+ Enables -march=nehalem
|
||||
+
|
||||
+config MWESTMERE
|
||||
+ bool "Intel Westmere"
|
||||
+ help
|
||||
+
|
||||
+ Select this for the Intel Westmere formerly Nehalem-C family.
|
||||
+
|
||||
+ Enables -march=westmere
|
||||
+
|
||||
+config MSILVERMONT
|
||||
+ bool "Intel Silvermont"
|
||||
+ help
|
||||
+
|
||||
+ Select this for the Intel Silvermont platform.
|
||||
+
|
||||
+ Enables -march=silvermont
|
||||
+
|
||||
+config MGOLDMONT
|
||||
+ bool "Intel Goldmont"
|
||||
+ help
|
||||
+
|
||||
+ Select this for the Intel Goldmont platform including Apollo Lake and Denverton.
|
||||
+
|
||||
+ Enables -march=goldmont
|
||||
+
|
||||
+config MGOLDMONTPLUS
|
||||
+ bool "Intel Goldmont Plus"
|
||||
+ help
|
||||
+
|
||||
+ Select this for the Intel Goldmont Plus platform including Gemini Lake.
|
||||
+
|
||||
+ Enables -march=goldmont-plus
|
||||
+
|
||||
+config MSANDYBRIDGE
|
||||
+ bool "Intel Sandy Bridge"
|
||||
+ help
|
||||
+
|
||||
+ Select this for 2nd Gen Core processors in the Sandy Bridge family.
|
||||
+
|
||||
+ Enables -march=sandybridge
|
||||
+
|
||||
+config MIVYBRIDGE
|
||||
+ bool "Intel Ivy Bridge"
|
||||
+ help
|
||||
+
|
||||
+ Select this for 3rd Gen Core processors in the Ivy Bridge family.
|
||||
+
|
||||
+ Enables -march=ivybridge
|
||||
+
|
||||
+config MHASWELL
|
||||
+ bool "Intel Haswell"
|
||||
+ help
|
||||
+
|
||||
+ Select this for 4th Gen Core processors in the Haswell family.
|
||||
+
|
||||
+ Enables -march=haswell
|
||||
+
|
||||
+config MBROADWELL
|
||||
+ bool "Intel Broadwell"
|
||||
+ help
|
||||
+
|
||||
+ Select this for 5th Gen Core processors in the Broadwell family.
|
||||
+
|
||||
+ Enables -march=broadwell
|
||||
+
|
||||
+config MSKYLAKE
|
||||
+ bool "Intel Skylake"
|
||||
+ help
|
||||
+
|
||||
+ Select this for 6th Gen Core processors in the Skylake family.
|
||||
+
|
||||
+ Enables -march=skylake
|
||||
+
|
||||
+config MSKYLAKEX
|
||||
+ bool "Intel Skylake X"
|
||||
+ help
|
||||
+
|
||||
+ Select this for 6th Gen Core processors in the Skylake X family.
|
||||
+
|
||||
+ Enables -march=skylake-avx512
|
||||
+
|
||||
+config MCANNONLAKE
|
||||
+ bool "Intel Cannon Lake"
|
||||
+ help
|
||||
+
|
||||
+ Select this for 8th Gen Core processors
|
||||
+
|
||||
+ Enables -march=cannonlake
|
||||
+
|
||||
+config MICELAKE
|
||||
+ bool "Intel Ice Lake"
|
||||
+ help
|
||||
+
|
||||
+ Select this for 10th Gen Core processors in the Ice Lake family.
|
||||
+
|
||||
+ Enables -march=icelake-client
|
||||
+
|
||||
+config MCASCADELAKE
|
||||
+ bool "Intel Cascade Lake"
|
||||
+ help
|
||||
+
|
||||
+ Select this for Xeon processors in the Cascade Lake family.
|
||||
+
|
||||
+ Enables -march=cascadelake
|
||||
+
|
||||
+config MCOOPERLAKE
|
||||
+ bool "Intel Cooper Lake"
|
||||
+ depends on (CC_IS_GCC && GCC_VERSION > 100100) || (CC_IS_CLANG && CLANG_VERSION >= 100000)
|
||||
+ help
|
||||
+
|
||||
+ Select this for Xeon processors in the Cooper Lake family.
|
||||
+
|
||||
+ Enables -march=cooperlake
|
||||
+
|
||||
+config MTIGERLAKE
|
||||
+ bool "Intel Tiger Lake"
|
||||
+ depends on (CC_IS_GCC && GCC_VERSION > 100100) || (CC_IS_CLANG && CLANG_VERSION >= 100000)
|
||||
+ help
|
||||
+
|
||||
+ Select this for third-generation 10 nm process processors in the Tiger Lake family.
|
||||
+
|
||||
+ Enables -march=tigerlake
|
||||
+
|
||||
+config MSAPPHIRERAPIDS
|
||||
+ bool "Intel Sapphire Rapids"
|
||||
+ depends on (CC_IS_GCC && GCC_VERSION > 110000) || (CC_IS_CLANG && CLANG_VERSION >= 120000)
|
||||
+ help
|
||||
+
|
||||
+ Select this for fourth-generation 10 nm process processors in the Sapphire Rapids family.
|
||||
+
|
||||
+ Enables -march=sapphirerapids
|
||||
+
|
||||
+config MROCKETLAKE
|
||||
+ bool "Intel Rocket Lake"
|
||||
+ depends on (CC_IS_GCC && GCC_VERSION > 110000) || (CC_IS_CLANG && CLANG_VERSION >= 120000)
|
||||
+ help
|
||||
+
|
||||
+ Select this for eleventh-generation processors in the Rocket Lake family.
|
||||
+
|
||||
+ Enables -march=rocketlake
|
||||
+
|
||||
+config MALDERLAKE
|
||||
+ bool "Intel Alder Lake"
|
||||
+ depends on (CC_IS_GCC && GCC_VERSION > 110000) || (CC_IS_CLANG && CLANG_VERSION >= 120000)
|
||||
+ help
|
||||
+
|
||||
+ Select this for twelfth-generation processors in the Alder Lake family.
|
||||
+
|
||||
+ Enables -march=alderlake
|
||||
+
|
||||
+config MRAPTORLAKE
|
||||
+ bool "Intel Raptor Lake"
|
||||
+ depends on (CC_IS_GCC && GCC_VERSION >= 130000) || (CC_IS_CLANG && CLANG_VERSION >= 150500)
|
||||
+ help
|
||||
+
|
||||
+ Select this for thirteenth-generation processors in the Raptor Lake family.
|
||||
+
|
||||
+ Enables -march=raptorlake
|
||||
+
|
||||
+config MMETEORLAKE
|
||||
+ bool "Intel Meteor Lake"
|
||||
+ depends on (CC_IS_GCC && GCC_VERSION >= 130000) || (CC_IS_CLANG && CLANG_VERSION >= 150500)
|
||||
+ help
|
||||
+
|
||||
+ Select this for fourteenth-generation processors in the Meteor Lake family.
|
||||
+
|
||||
+ Enables -march=meteorlake
|
||||
+
|
||||
+config MEMERALDRAPIDS
|
||||
+ bool "Intel Emerald Rapids"
|
||||
+ depends on (CC_IS_GCC && GCC_VERSION > 130000) || (CC_IS_CLANG && CLANG_VERSION >= 150500)
|
||||
+ help
|
||||
+
|
||||
+ Select this for fifth-generation 10 nm process processors in the Emerald Rapids family.
|
||||
+
|
||||
+ Enables -march=emeraldrapids
|
||||
|
||||
config GENERIC_CPU
|
||||
bool "Generic-x86-64"
|
||||
@@ -294,8 +581,32 @@ config GENERIC_CPU
|
||||
Generic x86-64 CPU.
|
||||
Run equally well on all x86-64 CPUs.
|
||||
|
||||
+config MNATIVE_INTEL
|
||||
+ bool "Intel-Native optimizations autodetected by the compiler"
|
||||
+ help
|
||||
+
|
||||
+ Clang 3.8, GCC 4.2 and above support -march=native, which automatically detects
|
||||
+ the optimum settings to use based on your processor. Do NOT use this
|
||||
+ for AMD CPUs. Intel Only!
|
||||
+
|
||||
+ Enables -march=native
|
||||
+
|
||||
+config MNATIVE_AMD
|
||||
+ bool "AMD-Native optimizations autodetected by the compiler"
|
||||
+ help
|
||||
+
|
||||
+ Clang 3.8, GCC 4.2 and above support -march=native, which automatically detects
|
||||
+ the optimum settings to use based on your processor. Do NOT use this
|
||||
+ for Intel CPUs. AMD Only!
|
||||
+
|
||||
+ Enables -march=native
|
||||
+
|
||||
endchoice
|
||||
|
||||
+config SUPPORT_MARCH_CODEVERS
|
||||
+ bool
|
||||
+ default y if (CC_IS_GCC && GCC_VERSION > 110000) || (CC_IS_CLANG && CLANG_VERSION >= 120000)
|
||||
+
|
||||
config X86_GENERIC
|
||||
bool "Generic x86 support"
|
||||
depends on X86_32
|
||||
@@ -308,6 +619,30 @@ config X86_GENERIC
|
||||
This is really intended for distributors who need more
|
||||
generic optimizations.
|
||||
|
||||
+config X86_64_VERSION
|
||||
+ int "x86-64 compiler ISA level"
|
||||
+ range 1 3
|
||||
+ depends on (CC_IS_GCC && GCC_VERSION > 110000) || (CC_IS_CLANG && CLANG_VERSION >= 120000)
|
||||
+ depends on X86_64 && GENERIC_CPU
|
||||
+ help
|
||||
+ Specify a specific x86-64 compiler ISA level.
|
||||
+
|
||||
+ There are three x86-64 ISA levels that work on top of
|
||||
+ the x86-64 baseline, namely: x86-64-v2, x86-64-v3, and x86-64-v4.
|
||||
+
|
||||
+ x86-64-v2 brings support for vector instructions up to Streaming SIMD
|
||||
+ Extensions 4.2 (SSE4.2) and Supplemental Streaming SIMD Extensions 3
|
||||
+ (SSSE3), the POPCNT instruction, and CMPXCHG16B.
|
||||
+
|
||||
+ x86-64-v3 adds vector instructions up to AVX2, MOVBE, and additional
|
||||
+ bit-manipulation instructions.
|
||||
+
|
||||
+ x86-64-v4 is not included since the kernel does not use AVX512 instructions
|
||||
+
|
||||
+ You can find the best version for your CPU by running one of the following:
|
||||
+ /lib/ld-linux-x86-64.so.2 --help | grep supported
|
||||
+ /lib64/ld-linux-x86-64.so.2 --help | grep supported
|
||||
+
|
||||
#
|
||||
# Define implied options from the CPU selection here
|
||||
config X86_INTERNODE_CACHE_SHIFT
|
||||
@@ -318,7 +653,7 @@ config X86_INTERNODE_CACHE_SHIFT
|
||||
config X86_L1_CACHE_SHIFT
|
||||
int
|
||||
default "7" if MPENTIUM4 || MPSC
|
||||
- default "6" if MK7 || MK8 || MPENTIUMM || MCORE2 || MATOM || MVIAC7 || X86_GENERIC || GENERIC_CPU
|
||||
+ default "6" if MK7 || MK8 || MPENTIUMM || MCORE2 || MATOM || MVIAC7 || X86_GENERIC || GENERIC_CPU || MK8SSE3 || MK10 || MBARCELONA || MBOBCAT || MJAGUAR || MBULLDOZER || MPILEDRIVER || MSTEAMROLLER || MEXCAVATOR || MZEN || MZEN2 || MZEN3 || MZEN4 || MZEN5 || MNEHALEM || MWESTMERE || MSILVERMONT || MGOLDMONT || MGOLDMONTPLUS || MSANDYBRIDGE || MIVYBRIDGE || MHASWELL || MBROADWELL || MSKYLAKE || MSKYLAKEX || MCANNONLAKE || MICELAKE || MCASCADELAKE || MCOOPERLAKE || MTIGERLAKE || MSAPPHIRERAPIDS || MROCKETLAKE || MALDERLAKE || MRAPTORLAKE || MMETEORLAKE || MEMERALDRAPIDS || MNATIVE_INTEL || MNATIVE_AMD
|
||||
default "4" if MELAN || M486SX || M486 || MGEODEGX1
|
||||
default "5" if MWINCHIP3D || MWINCHIPC6 || MCRUSOE || MEFFICEON || MCYRIXIII || MK6 || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || M586 || MVIAC3_2 || MGEODE_LX
|
||||
|
||||
@@ -336,11 +671,11 @@ config X86_ALIGNMENT_16
|
||||
|
||||
config X86_INTEL_USERCOPY
|
||||
def_bool y
|
||||
- depends on MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M586MMX || X86_GENERIC || MK8 || MK7 || MEFFICEON || MCORE2
|
||||
+ depends on MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M586MMX || X86_GENERIC || MK8 || MK7 || MEFFICEON || MCORE2 || MNEHALEM || MWESTMERE || MSILVERMONT || MGOLDMONT || MGOLDMONTPLUS || MSANDYBRIDGE || MIVYBRIDGE || MHASWELL || MBROADWELL || MSKYLAKE || MSKYLAKEX || MCANNONLAKE || MICELAKE || MCASCADELAKE || MCOOPERLAKE || MTIGERLAKE || MSAPPHIRERAPIDS || MROCKETLAKE || MALDERLAKE || MRAPTORLAKE || MMETEORLAKE || MEMERALDRAPIDS || MNATIVE_INTEL
|
||||
|
||||
config X86_USE_PPRO_CHECKSUM
|
||||
def_bool y
|
||||
- depends on MWINCHIP3D || MWINCHIPC6 || MCYRIXIII || MK7 || MK6 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MK8 || MVIAC3_2 || MVIAC7 || MEFFICEON || MGEODE_LX || MCORE2 || MATOM
|
||||
+ depends on MWINCHIP3D || MWINCHIPC6 || MCYRIXIII || MK7 || MK6 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MK8 || MVIAC3_2 || MVIAC7 || MEFFICEON || MGEODE_LX || MCORE2 || MATOM || MK8SSE3 || MK10 || MBARCELONA || MBOBCAT || MJAGUAR || MBULLDOZER || MPILEDRIVER || MSTEAMROLLER || MEXCAVATOR || MZEN || MZEN2 || MZEN3 || MZEN4 || MZEN5 || MNEHALEM || MWESTMERE || MSILVERMONT || MGOLDMONT || MGOLDMONTPLUS || MSANDYBRIDGE || MIVYBRIDGE || MHASWELL || MBROADWELL || MSKYLAKE || MSKYLAKEX || MCANNONLAKE || MICELAKE || MCASCADELAKE || MCOOPERLAKE || MTIGERLAKE || MSAPPHIRERAPIDS || MROCKETLAKE || MALDERLAKE || MRAPTORLAKE || MMETEORLAKE || MEMERALDRAPIDS || MNATIVE_INTEL || MNATIVE_AMD
|
||||
|
||||
#
|
||||
# P6_NOPs are a relatively minor optimization that require a family >=
|
||||
--- a/arch/x86/Makefile
|
||||
+++ b/arch/x86/Makefile
|
||||
@@ -178,14 +178,99 @@ else
|
||||
cflags-$(CONFIG_MPSC) += -march=nocona
|
||||
cflags-$(CONFIG_MCORE2) += -march=core2
|
||||
cflags-$(CONFIG_MATOM) += -march=atom
|
||||
- cflags-$(CONFIG_GENERIC_CPU) += -mtune=generic
|
||||
+ ifeq ($(CONFIG_X86_64_VERSION),1)
|
||||
+ cflags-$(CONFIG_GENERIC_CPU) += -mtune=generic
|
||||
+ rustflags-$(CONFIG_GENERIC_CPU) += -Ztune-cpu=generic
|
||||
+ else
|
||||
+ cflags-$(CONFIG_GENERIC_CPU) += -march=x86-64-v$(CONFIG_X86_64_VERSION)
|
||||
+ rustflags-$(CONFIG_GENERIC_CPU) += -Ctarget-cpu=x86-64-v$(CONFIG_X86_64_VERSION)
|
||||
+ endif
|
||||
+ cflags-$(CONFIG_MK8SSE3) += -march=k8-sse3
|
||||
+ cflags-$(CONFIG_MK10) += -march=amdfam10
|
||||
+ cflags-$(CONFIG_MBARCELONA) += -march=barcelona
|
||||
+ cflags-$(CONFIG_MBOBCAT) += -march=btver1
|
||||
+ cflags-$(CONFIG_MJAGUAR) += -march=btver2
|
||||
+ cflags-$(CONFIG_MBULLDOZER) += -march=bdver1
|
||||
+ cflags-$(CONFIG_MPILEDRIVER) += -march=bdver2 -mno-tbm
|
||||
+ cflags-$(CONFIG_MSTEAMROLLER) += -march=bdver3 -mno-tbm
|
||||
+ cflags-$(CONFIG_MEXCAVATOR) += -march=bdver4 -mno-tbm
|
||||
+ cflags-$(CONFIG_MZEN) += -march=znver1
|
||||
+ cflags-$(CONFIG_MZEN2) += -march=znver2
|
||||
+ cflags-$(CONFIG_MZEN3) += -march=znver3
|
||||
+ cflags-$(CONFIG_MZEN4) += -march=znver4
|
||||
+ cflags-$(CONFIG_MZEN5) += -march=znver5
|
||||
+ cflags-$(CONFIG_MNATIVE_INTEL) += -march=native
|
||||
+ cflags-$(CONFIG_MNATIVE_AMD) += -march=native -mno-tbm
|
||||
+ cflags-$(CONFIG_MATOM) += -march=bonnell
|
||||
+ cflags-$(CONFIG_MCORE2) += -march=core2
|
||||
+ cflags-$(CONFIG_MNEHALEM) += -march=nehalem
|
||||
+ cflags-$(CONFIG_MWESTMERE) += -march=westmere
|
||||
+ cflags-$(CONFIG_MSILVERMONT) += -march=silvermont
|
||||
+ cflags-$(CONFIG_MGOLDMONT) += -march=goldmont
|
||||
+ cflags-$(CONFIG_MGOLDMONTPLUS) += -march=goldmont-plus
|
||||
+ cflags-$(CONFIG_MSANDYBRIDGE) += -march=sandybridge
|
||||
+ cflags-$(CONFIG_MIVYBRIDGE) += -march=ivybridge
|
||||
+ cflags-$(CONFIG_MHASWELL) += -march=haswell
|
||||
+ cflags-$(CONFIG_MBROADWELL) += -march=broadwell
|
||||
+ cflags-$(CONFIG_MSKYLAKE) += -march=skylake
|
||||
+ cflags-$(CONFIG_MSKYLAKEX) += -march=skylake-avx512
|
||||
+ cflags-$(CONFIG_MCANNONLAKE) += -march=cannonlake
|
||||
+ cflags-$(CONFIG_MICELAKE) += -march=icelake-client
|
||||
+ cflags-$(CONFIG_MCASCADELAKE) += -march=cascadelake
|
||||
+ cflags-$(CONFIG_MCOOPERLAKE) += -march=cooperlake
|
||||
+ cflags-$(CONFIG_MTIGERLAKE) += -march=tigerlake
|
||||
+ cflags-$(CONFIG_MSAPPHIRERAPIDS) += -march=sapphirerapids
|
||||
+ cflags-$(CONFIG_MROCKETLAKE) += -march=rocketlake
|
||||
+ cflags-$(CONFIG_MALDERLAKE) += -march=alderlake
|
||||
+ cflags-$(CONFIG_MRAPTORLAKE) += -march=raptorlake
|
||||
+ cflags-$(CONFIG_MMETEORLAKE) += -march=meteorlake
|
||||
+ cflags-$(CONFIG_MEMERALDRAPIDS) += -march=emeraldrapids
|
||||
KBUILD_CFLAGS += $(cflags-y)
|
||||
|
||||
rustflags-$(CONFIG_MK8) += -Ctarget-cpu=k8
|
||||
rustflags-$(CONFIG_MPSC) += -Ctarget-cpu=nocona
|
||||
rustflags-$(CONFIG_MCORE2) += -Ctarget-cpu=core2
|
||||
rustflags-$(CONFIG_MATOM) += -Ctarget-cpu=atom
|
||||
- rustflags-$(CONFIG_GENERIC_CPU) += -Ztune-cpu=generic
|
||||
+ rustflags-$(CONFIG_MK8SSE3) += -Ctarget-cpu=k8-sse3
|
||||
+ rustflags-$(CONFIG_MK10) += -Ctarget-cpu=amdfam10
|
||||
+ rustflags-$(CONFIG_MBARCELONA) += -Ctarget-cpu=barcelona
|
||||
+ rustflags-$(CONFIG_MBOBCAT) += -Ctarget-cpu=btver1
|
||||
+ rustflags-$(CONFIG_MJAGUAR) += -Ctarget-cpu=btver2
|
||||
+ rustflags-$(CONFIG_MBULLDOZER) += -Ctarget-cpu=bdver1
|
||||
+ rustflags-$(CONFIG_MPILEDRIVER) += -Ctarget-cpu=bdver2
|
||||
+ rustflags-$(CONFIG_MSTEAMROLLER) += -Ctarget-cpu=bdver3
|
||||
+ rustflags-$(CONFIG_MEXCAVATOR) += -Ctarget-cpu=bdver4
|
||||
+ rustflags-$(CONFIG_MZEN) += -Ctarget-cpu=znver1
|
||||
+ rustflags-$(CONFIG_MZEN2) += -Ctarget-cpu=znver2
|
||||
+ rustflags-$(CONFIG_MZEN3) += -Ctarget-cpu=znver3
|
||||
+ rustflags-$(CONFIG_MZEN4) += -Ctarget-cpu=znver4
|
||||
+ rustflags-$(CONFIG_MZEN5) += -Ctarget-cpu=znver5
|
||||
+ rustflags-$(CONFIG_MNATIVE_INTEL) += -Ctarget-cpu=native
|
||||
+ rustflags-$(CONFIG_MNATIVE_AMD) += -Ctarget-cpu=native
|
||||
+ rustflags-$(CONFIG_MATOM) += -Ctarget-cpu=bonnell
|
||||
+ rustflags-$(CONFIG_MCORE2) += -Ctarget-cpu=core2
|
||||
+ rustflags-$(CONFIG_MNEHALEM) += -Ctarget-cpu=nehalem
|
||||
+ rustflags-$(CONFIG_MWESTMERE) += -Ctarget-cpu=westmere
|
||||
+ rustflags-$(CONFIG_MSILVERMONT) += -Ctarget-cpu=silvermont
|
||||
+ rustflags-$(CONFIG_MGOLDMONT) += -Ctarget-cpu=goldmont
|
||||
+ rustflags-$(CONFIG_MGOLDMONTPLUS) += -Ctarget-cpu=goldmont-plus
|
||||
+ rustflags-$(CONFIG_MSANDYBRIDGE) += -Ctarget-cpu=sandybridge
|
||||
+ rustflags-$(CONFIG_MIVYBRIDGE) += -Ctarget-cpu=ivybridge
|
||||
+ rustflags-$(CONFIG_MHASWELL) += -Ctarget-cpu=haswell
|
||||
+ rustflags-$(CONFIG_MBROADWELL) += -Ctarget-cpu=broadwell
|
||||
+ rustflags-$(CONFIG_MSKYLAKE) += -Ctarget-cpu=skylake
|
||||
+ rustflags-$(CONFIG_MSKYLAKEX) += -Ctarget-cpu=skylake-avx512
|
||||
+ rustflags-$(CONFIG_MCANNONLAKE) += -Ctarget-cpu=cannonlake
|
||||
+ rustflags-$(CONFIG_MICELAKE) += -Ctarget-cpu=icelake-client
|
||||
+ rustflags-$(CONFIG_MCASCADELAKE) += -Ctarget-cpu=cascadelake
|
||||
+ rustflags-$(CONFIG_MCOOPERLAKE) += -Ctarget-cpu=cooperlake
|
||||
+ rustflags-$(CONFIG_MTIGERLAKE) += -Ctarget-cpu=tigerlake
|
||||
+ rustflags-$(CONFIG_MSAPPHIRERAPIDS) += -Ctarget-cpu=sapphirerapids
|
||||
+ rustflags-$(CONFIG_MROCKETLAKE) += -Ctarget-cpu=rocketlake
|
||||
+ rustflags-$(CONFIG_MALDERLAKE) += -Ctarget-cpu=alderlake
|
||||
+ rustflags-$(CONFIG_MRAPTORLAKE) += -Ctarget-cpu=raptorlake
|
||||
+ rustflags-$(CONFIG_MMETEORLAKE) += -Ctarget-cpu=meteorlake
|
||||
+ rustflags-$(CONFIG_MEMERALDRAPIDS) += -Ctarget-cpu=emeraldrapids
|
||||
KBUILD_RUSTFLAGS += $(rustflags-y)
|
||||
|
||||
KBUILD_CFLAGS += -mno-red-zone
|
||||
--- a/arch/x86/include/asm/vermagic.h
|
||||
+++ b/arch/x86/include/asm/vermagic.h
|
||||
@@ -17,6 +17,54 @@
|
||||
#define MODULE_PROC_FAMILY "586MMX "
|
||||
#elif defined CONFIG_MCORE2
|
||||
#define MODULE_PROC_FAMILY "CORE2 "
|
||||
+#elif defined CONFIG_MNATIVE_INTEL
|
||||
+#define MODULE_PROC_FAMILY "NATIVE_INTEL "
|
||||
+#elif defined CONFIG_MNATIVE_AMD
|
||||
+#define MODULE_PROC_FAMILY "NATIVE_AMD "
|
||||
+#elif defined CONFIG_MNEHALEM
|
||||
+#define MODULE_PROC_FAMILY "NEHALEM "
|
||||
+#elif defined CONFIG_MWESTMERE
|
||||
+#define MODULE_PROC_FAMILY "WESTMERE "
|
||||
+#elif defined CONFIG_MSILVERMONT
|
||||
+#define MODULE_PROC_FAMILY "SILVERMONT "
|
||||
+#elif defined CONFIG_MGOLDMONT
|
||||
+#define MODULE_PROC_FAMILY "GOLDMONT "
|
||||
+#elif defined CONFIG_MGOLDMONTPLUS
|
||||
+#define MODULE_PROC_FAMILY "GOLDMONTPLUS "
|
||||
+#elif defined CONFIG_MSANDYBRIDGE
|
||||
+#define MODULE_PROC_FAMILY "SANDYBRIDGE "
|
||||
+#elif defined CONFIG_MIVYBRIDGE
|
||||
+#define MODULE_PROC_FAMILY "IVYBRIDGE "
|
||||
+#elif defined CONFIG_MHASWELL
|
||||
+#define MODULE_PROC_FAMILY "HASWELL "
|
||||
+#elif defined CONFIG_MBROADWELL
|
||||
+#define MODULE_PROC_FAMILY "BROADWELL "
|
||||
+#elif defined CONFIG_MSKYLAKE
|
||||
+#define MODULE_PROC_FAMILY "SKYLAKE "
|
||||
+#elif defined CONFIG_MSKYLAKEX
|
||||
+#define MODULE_PROC_FAMILY "SKYLAKEX "
|
||||
+#elif defined CONFIG_MCANNONLAKE
|
||||
+#define MODULE_PROC_FAMILY "CANNONLAKE "
|
||||
+#elif defined CONFIG_MICELAKE
|
||||
+#define MODULE_PROC_FAMILY "ICELAKE "
|
||||
+#elif defined CONFIG_MCASCADELAKE
|
||||
+#define MODULE_PROC_FAMILY "CASCADELAKE "
|
||||
+#elif defined CONFIG_MCOOPERLAKE
|
||||
+#define MODULE_PROC_FAMILY "COOPERLAKE "
|
||||
+#elif defined CONFIG_MTIGERLAKE
|
||||
+#define MODULE_PROC_FAMILY "TIGERLAKE "
|
||||
+#elif defined CONFIG_MSAPPHIRERAPIDS
|
||||
+#define MODULE_PROC_FAMILY "SAPPHIRERAPIDS "
|
||||
+#elif defined CONFIG_ROCKETLAKE
|
||||
+#define MODULE_PROC_FAMILY "ROCKETLAKE "
|
||||
+#elif defined CONFIG_MALDERLAKE
|
||||
+#define MODULE_PROC_FAMILY "ALDERLAKE "
|
||||
+#elif defined CONFIG_MRAPTORLAKE
|
||||
+#define MODULE_PROC_FAMILY "RAPTORLAKE "
|
||||
+#elif defined CONFIG_MMETEORLAKE
|
||||
+#define MODULE_PROC_FAMILY "METEORLAKE "
|
||||
+#elif defined CONFIG_MEMERALDRAPIDS
|
||||
+#define MODULE_PROC_FAMILY "EMERALDRAPIDS "
|
||||
#elif defined CONFIG_MATOM
|
||||
#define MODULE_PROC_FAMILY "ATOM "
|
||||
#elif defined CONFIG_M686
|
||||
@@ -35,6 +83,28 @@
|
||||
#define MODULE_PROC_FAMILY "K7 "
|
||||
#elif defined CONFIG_MK8
|
||||
#define MODULE_PROC_FAMILY "K8 "
|
||||
+#elif defined CONFIG_MK8SSE3
|
||||
+#define MODULE_PROC_FAMILY "K8SSE3 "
|
||||
+#elif defined CONFIG_MK10
|
||||
+#define MODULE_PROC_FAMILY "K10 "
|
||||
+#elif defined CONFIG_MBARCELONA
|
||||
+#define MODULE_PROC_FAMILY "BARCELONA "
|
||||
+#elif defined CONFIG_MBOBCAT
|
||||
+#define MODULE_PROC_FAMILY "BOBCAT "
|
||||
+#elif defined CONFIG_MBULLDOZER
|
||||
+#define MODULE_PROC_FAMILY "BULLDOZER "
|
||||
+#elif defined CONFIG_MPILEDRIVER
|
||||
+#define MODULE_PROC_FAMILY "PILEDRIVER "
|
||||
+#elif defined CONFIG_MSTEAMROLLER
|
||||
+#define MODULE_PROC_FAMILY "STEAMROLLER "
|
||||
+#elif defined CONFIG_MJAGUAR
|
||||
+#define MODULE_PROC_FAMILY "JAGUAR "
|
||||
+#elif defined CONFIG_MEXCAVATOR
|
||||
+#define MODULE_PROC_FAMILY "EXCAVATOR "
|
||||
+#elif defined CONFIG_MZEN
|
||||
+#define MODULE_PROC_FAMILY "ZEN "
|
||||
+#elif defined CONFIG_MZEN2
|
||||
+#define MODULE_PROC_FAMILY "ZEN2 "
|
||||
#elif defined CONFIG_MELAN
|
||||
#define MODULE_PROC_FAMILY "ELAN "
|
||||
#elif defined CONFIG_MCRUSOE
|
60
debian/patches/mixed-arch/0002-ZEN-Fixup-graysky-s-more-ISA-levels-and-uarches.patch
vendored
Normal file
60
debian/patches/mixed-arch/0002-ZEN-Fixup-graysky-s-more-ISA-levels-and-uarches.patch
vendored
Normal file
@ -0,0 +1,60 @@
|
||||
From 44295ad130b8735cecb288dd7463a14892803d9b Mon Sep 17 00:00:00 2001
|
||||
From: "Jan Alexander Steffens (heftig)" <heftig@archlinux.org>
|
||||
Date: Tue, 1 Oct 2024 02:05:12 +0200
|
||||
Subject: ZEN: Fixup graysky's more-ISA-levels-and-uarches
|
||||
|
||||
See: https://github.com/graysky2/kernel_compiler_patch/issues/105
|
||||
---
|
||||
arch/x86/Kconfig.cpu | 4 ----
|
||||
arch/x86/Makefile | 4 ----
|
||||
arch/x86/include/asm/vermagic.h | 6 ++++++
|
||||
3 files changed, 6 insertions(+), 8 deletions(-)
|
||||
|
||||
--- a/arch/x86/Kconfig.cpu
|
||||
+++ b/arch/x86/Kconfig.cpu
|
||||
@@ -603,10 +603,6 @@ config MNATIVE_AMD
|
||||
|
||||
endchoice
|
||||
|
||||
-config SUPPORT_MARCH_CODEVERS
|
||||
- bool
|
||||
- default y if (CC_IS_GCC && GCC_VERSION > 110000) || (CC_IS_CLANG && CLANG_VERSION >= 120000)
|
||||
-
|
||||
config X86_GENERIC
|
||||
bool "Generic x86 support"
|
||||
depends on X86_32
|
||||
--- a/arch/x86/Makefile
|
||||
+++ b/arch/x86/Makefile
|
||||
@@ -176,8 +176,6 @@ else
|
||||
# FIXME - should be integrated in Makefile.cpu (Makefile_32.cpu)
|
||||
cflags-$(CONFIG_MK8) += -march=k8
|
||||
cflags-$(CONFIG_MPSC) += -march=nocona
|
||||
- cflags-$(CONFIG_MCORE2) += -march=core2
|
||||
- cflags-$(CONFIG_MATOM) += -march=atom
|
||||
ifeq ($(CONFIG_X86_64_VERSION),1)
|
||||
cflags-$(CONFIG_GENERIC_CPU) += -mtune=generic
|
||||
rustflags-$(CONFIG_GENERIC_CPU) += -Ztune-cpu=generic
|
||||
@@ -229,8 +227,6 @@ else
|
||||
|
||||
rustflags-$(CONFIG_MK8) += -Ctarget-cpu=k8
|
||||
rustflags-$(CONFIG_MPSC) += -Ctarget-cpu=nocona
|
||||
- rustflags-$(CONFIG_MCORE2) += -Ctarget-cpu=core2
|
||||
- rustflags-$(CONFIG_MATOM) += -Ctarget-cpu=atom
|
||||
rustflags-$(CONFIG_MK8SSE3) += -Ctarget-cpu=k8-sse3
|
||||
rustflags-$(CONFIG_MK10) += -Ctarget-cpu=amdfam10
|
||||
rustflags-$(CONFIG_MBARCELONA) += -Ctarget-cpu=barcelona
|
||||
--- a/arch/x86/include/asm/vermagic.h
|
||||
+++ b/arch/x86/include/asm/vermagic.h
|
||||
@@ -105,6 +105,12 @@
|
||||
#define MODULE_PROC_FAMILY "ZEN "
|
||||
#elif defined CONFIG_MZEN2
|
||||
#define MODULE_PROC_FAMILY "ZEN2 "
|
||||
+#elif defined CONFIG_MZEN3
|
||||
+#define MODULE_PROC_FAMILY "ZEN3 "
|
||||
+#elif defined CONFIG_MZEN4
|
||||
+#define MODULE_PROC_FAMILY "ZEN4 "
|
||||
+#elif defined CONFIG_MZEN5
|
||||
+#define MODULE_PROC_FAMILY "ZEN5 "
|
||||
#elif defined CONFIG_MELAN
|
||||
#define MODULE_PROC_FAMILY "ELAN "
|
||||
#elif defined CONFIG_MCRUSOE
|
40
debian/patches/mixed-arch/0003-ZEN-Restore-CONFIG_OPTIMIZE_FOR_PERFORMANCE_O3.patch
vendored
Normal file
40
debian/patches/mixed-arch/0003-ZEN-Restore-CONFIG_OPTIMIZE_FOR_PERFORMANCE_O3.patch
vendored
Normal file
@ -0,0 +1,40 @@
|
||||
From 8dc948926f5b68b16a6a47a8f6e0a2154ac8ef3e Mon Sep 17 00:00:00 2001
|
||||
From: "Jan Alexander Steffens (heftig)" <heftig@archlinux.org>
|
||||
Date: Sun, 11 Dec 2022 23:51:16 +0100
|
||||
Subject: ZEN: Restore CONFIG_OPTIMIZE_FOR_PERFORMANCE_O3
|
||||
|
||||
This reverts a6036a41bffba3d5007e377483b425d470ad8042 (kbuild: drop
|
||||
support for CONFIG_CC_OPTIMIZE_FOR_PERFORMANCE_O3), removes the
|
||||
dependency on CONFIG_ARC and adds RUSTFLAGS.
|
||||
---
|
||||
Makefile | 3 +++
|
||||
init/Kconfig | 6 ++++++
|
||||
2 files changed, 9 insertions(+)
|
||||
|
||||
--- a/Makefile
|
||||
+++ b/Makefile
|
||||
@@ -814,6 +814,9 @@ KBUILD_CFLAGS += -fno-delete-null-pointe
|
||||
ifdef CONFIG_CC_OPTIMIZE_FOR_PERFORMANCE
|
||||
KBUILD_CFLAGS += -O2
|
||||
KBUILD_RUSTFLAGS += -Copt-level=2
|
||||
+else ifdef CONFIG_CC_OPTIMIZE_FOR_PERFORMANCE_O3
|
||||
+KBUILD_CFLAGS += -O3
|
||||
+KBUILD_RUSTFLAGS += -Copt-level=3
|
||||
else ifdef CONFIG_CC_OPTIMIZE_FOR_SIZE
|
||||
KBUILD_CFLAGS += -Os
|
||||
KBUILD_RUSTFLAGS += -Copt-level=s
|
||||
--- a/init/Kconfig
|
||||
+++ b/init/Kconfig
|
||||
@@ -1407,6 +1407,12 @@ config CC_OPTIMIZE_FOR_PERFORMANCE
|
||||
with the "-O2" compiler flag for best performance and most
|
||||
helpful compile-time warnings.
|
||||
|
||||
+config CC_OPTIMIZE_FOR_PERFORMANCE_O3
|
||||
+ bool "Optimize more for performance (-O3)"
|
||||
+ help
|
||||
+ Choosing this option will pass "-O3" to your compiler to optimize
|
||||
+ the kernel yet more for performance.
|
||||
+
|
||||
config CC_OPTIMIZE_FOR_SIZE
|
||||
bool "Optimize for size (-Os)"
|
||||
help
|
11
debian/patches/mixed-arch/0004-krd-adjust-CONFIG_CC_OPTIMIZE_FOR_PERFORMANCE_O3.patch
vendored
Normal file
11
debian/patches/mixed-arch/0004-krd-adjust-CONFIG_CC_OPTIMIZE_FOR_PERFORMANCE_O3.patch
vendored
Normal file
@ -0,0 +1,11 @@
|
||||
--- a/Makefile
|
||||
+++ b/Makefile
|
||||
@@ -815,7 +815,7 @@ ifdef CONFIG_CC_OPTIMIZE_FOR_PERFORMANCE
|
||||
KBUILD_CFLAGS += -O2
|
||||
KBUILD_RUSTFLAGS += -Copt-level=2
|
||||
else ifdef CONFIG_CC_OPTIMIZE_FOR_PERFORMANCE_O3
|
||||
-KBUILD_CFLAGS += -O3
|
||||
+KBUILD_CFLAGS += -O3 $(call cc-option,-fivopts)
|
||||
KBUILD_RUSTFLAGS += -Copt-level=3
|
||||
else ifdef CONFIG_CC_OPTIMIZE_FOR_SIZE
|
||||
KBUILD_CFLAGS += -Os
|
25
debian/patches/mixed-arch/0005-XANMOD-x86-build-Prevent-generating-avx2-and-avx512-.patch
vendored
Normal file
25
debian/patches/mixed-arch/0005-XANMOD-x86-build-Prevent-generating-avx2-and-avx512-.patch
vendored
Normal file
@ -0,0 +1,25 @@
|
||||
From 3ebc1fdf3e0ee9bff1efe20eb5791eba5c84a810 Mon Sep 17 00:00:00 2001
|
||||
From: Alexandre Frade <kernel@xanmod.org>
|
||||
Date: Thu, 3 Aug 2023 13:53:49 +0000
|
||||
Subject: [PATCH 01/19] XANMOD: x86/build: Prevent generating avx2 and avx512
|
||||
floating-point code
|
||||
|
||||
Signed-off-by: Alexandre Frade <kernel@xanmod.org>
|
||||
---
|
||||
arch/x86/Makefile | 4 ++--
|
||||
1 file changed, 2 insertions(+), 2 deletions(-)
|
||||
|
||||
--- a/arch/x86/Makefile
|
||||
+++ b/arch/x86/Makefile
|
||||
@@ -70,9 +70,9 @@ export BITS
|
||||
#
|
||||
# https://gcc.gnu.org/bugzilla/show_bug.cgi?id=53383
|
||||
#
|
||||
-KBUILD_CFLAGS += -mno-sse -mno-mmx -mno-sse2 -mno-3dnow -mno-avx
|
||||
+KBUILD_CFLAGS += -mno-sse -mno-mmx -mno-sse2 -mno-3dnow -mno-avx -mno-avx2 -mno-avx512f
|
||||
KBUILD_RUSTFLAGS += --target=$(objtree)/scripts/target.json
|
||||
-KBUILD_RUSTFLAGS += -Ctarget-feature=-sse,-sse2,-sse3,-ssse3,-sse4.1,-sse4.2,-avx,-avx2
|
||||
+KBUILD_RUSTFLAGS += -Ctarget-feature=-sse,-sse2,-sse3,-ssse3,-sse4.1,-sse4.2,-avx,-avx2,-avx512f
|
||||
|
||||
#
|
||||
# CFLAGS for compiling floating point code inside the kernel.
|
11
debian/patches/mixed-arch/0006-krd-adjust-KBUILD_CFLAGS-fno-tree-vectorize.patch
vendored
Normal file
11
debian/patches/mixed-arch/0006-krd-adjust-KBUILD_CFLAGS-fno-tree-vectorize.patch
vendored
Normal file
@ -0,0 +1,11 @@
|
||||
--- a/arch/x86/Makefile
|
||||
+++ b/arch/x86/Makefile
|
||||
@@ -70,7 +70,7 @@ export BITS
|
||||
#
|
||||
# https://gcc.gnu.org/bugzilla/show_bug.cgi?id=53383
|
||||
#
|
||||
-KBUILD_CFLAGS += -mno-sse -mno-mmx -mno-sse2 -mno-3dnow -mno-avx -mno-avx2 -mno-avx512f
|
||||
+KBUILD_CFLAGS += -mno-sse -mno-mmx -mno-sse2 -mno-3dnow -mno-avx -mno-avx2 -mno-avx512f -fno-tree-vectorize
|
||||
KBUILD_RUSTFLAGS += --target=$(objtree)/scripts/target.json
|
||||
KBUILD_RUSTFLAGS += -Ctarget-feature=-sse,-sse2,-sse3,-ssse3,-sse4.1,-sse4.2,-avx,-avx2,-avx512f
|
||||
|
@ -0,0 +1,70 @@
|
||||
From 3427331872c37b2edb42406c65764e1565b0591b Mon Sep 17 00:00:00 2001
|
||||
From: Perry Yuan <perry.yuan@amd.com>
|
||||
Date: Fri, 9 Aug 2024 14:09:05 +0800
|
||||
Subject: cpufreq: amd-pstate: add quirk for Ryzen 3000 series processor
|
||||
|
||||
The Ryzen 3000 series processors have been observed lacking the
|
||||
nominal_freq and lowest_freq parameters in their ACPI tables. This
|
||||
absence causes issues with loading the amd-pstate driver on these
|
||||
systems. Introduces a fix to resolve the dependency issue
|
||||
by adding a quirk specifically for the Ryzen 3000 series.
|
||||
|
||||
Reported-by: David Wang <00107082@163.com>
|
||||
Signed-off-by: Perry Yuan <perry.yuan@amd.com>
|
||||
---
|
||||
drivers/cpufreq/amd-pstate.c | 30 ++++++++++++++++++++++++++++++
|
||||
1 file changed, 30 insertions(+)
|
||||
|
||||
--- a/drivers/cpufreq/amd-pstate.c
|
||||
+++ b/drivers/cpufreq/amd-pstate.c
|
||||
@@ -142,6 +142,11 @@ static struct quirk_entry quirk_amd_7k62
|
||||
.lowest_freq = 550,
|
||||
};
|
||||
|
||||
+static struct quirk_entry quirk_amd_mts = {
|
||||
+ .nominal_freq = 3600,
|
||||
+ .lowest_freq = 550,
|
||||
+};
|
||||
+
|
||||
static int __init dmi_matched_7k62_bios_bug(const struct dmi_system_id *dmi)
|
||||
{
|
||||
/**
|
||||
@@ -158,6 +163,21 @@ static int __init dmi_matched_7k62_bios_
|
||||
return 0;
|
||||
}
|
||||
|
||||
+static int __init dmi_matched_mts_bios_bug(const struct dmi_system_id *dmi)
|
||||
+{
|
||||
+ /**
|
||||
+ * match the broken bios for ryzen 3000 series processor support CPPC V2
|
||||
+ * broken BIOS lack of nominal_freq and lowest_freq capabilities
|
||||
+ * definition in ACPI tables
|
||||
+ */
|
||||
+ if (cpu_feature_enabled(X86_FEATURE_ZEN2)) {
|
||||
+ quirks = dmi->driver_data;
|
||||
+ pr_info("Overriding nominal and lowest frequencies for %s\n", dmi->ident);
|
||||
+ return 1;
|
||||
+ }
|
||||
+
|
||||
+ return 0;
|
||||
+}
|
||||
static const struct dmi_system_id amd_pstate_quirks_table[] __initconst = {
|
||||
{
|
||||
.callback = dmi_matched_7k62_bios_bug,
|
||||
@@ -168,6 +188,16 @@ static const struct dmi_system_id amd_ps
|
||||
},
|
||||
.driver_data = &quirk_amd_7k62,
|
||||
},
|
||||
+ {
|
||||
+ .callback = dmi_matched_mts_bios_bug,
|
||||
+ .ident = "AMD Ryzen 3000",
|
||||
+ .matches = {
|
||||
+ DMI_MATCH(DMI_PRODUCT_NAME, "B450M MORTAR MAX (MS-7B89)"),
|
||||
+ DMI_MATCH(DMI_BIOS_RELEASE, "06/10/2020"),
|
||||
+ DMI_MATCH(DMI_BIOS_VERSION, "5.14"),
|
||||
+ },
|
||||
+ .driver_data = &quirk_amd_mts,
|
||||
+ },
|
||||
{}
|
||||
};
|
||||
MODULE_DEVICE_TABLE(dmi, amd_pstate_quirks_table);
|
@ -0,0 +1,88 @@
|
||||
From 44f21855901b1fd618ac16b07dbd14e8fea4ee13 Mon Sep 17 00:00:00 2001
|
||||
From: Mario Limonciello <mario.limonciello@amd.com>
|
||||
Date: Sat, 31 Aug 2024 21:49:11 -0500
|
||||
Subject: cpufreq/amd-pstate: Export symbols for changing modes
|
||||
|
||||
In order to effectively test all mode switch combinations export
|
||||
everything necessarily for amd-pstate-ut to trigger a mode switch.
|
||||
|
||||
Signed-off-by: Mario Limonciello <mario.limonciello@amd.com>
|
||||
---
|
||||
drivers/cpufreq/amd-pstate.c | 23 ++++++++++-------------
|
||||
drivers/cpufreq/amd-pstate.h | 14 ++++++++++++++
|
||||
2 files changed, 24 insertions(+), 13 deletions(-)
|
||||
|
||||
--- a/drivers/cpufreq/amd-pstate.c
|
||||
+++ b/drivers/cpufreq/amd-pstate.c
|
||||
@@ -60,18 +60,6 @@
|
||||
#define AMD_CPPC_EPP_BALANCE_POWERSAVE 0xBF
|
||||
#define AMD_CPPC_EPP_POWERSAVE 0xFF
|
||||
|
||||
-/*
|
||||
- * enum amd_pstate_mode - driver working mode of amd pstate
|
||||
- */
|
||||
-enum amd_pstate_mode {
|
||||
- AMD_PSTATE_UNDEFINED = 0,
|
||||
- AMD_PSTATE_DISABLE,
|
||||
- AMD_PSTATE_PASSIVE,
|
||||
- AMD_PSTATE_ACTIVE,
|
||||
- AMD_PSTATE_GUIDED,
|
||||
- AMD_PSTATE_MAX,
|
||||
-};
|
||||
-
|
||||
static const char * const amd_pstate_mode_string[] = {
|
||||
[AMD_PSTATE_UNDEFINED] = "undefined",
|
||||
[AMD_PSTATE_DISABLE] = "disable",
|
||||
@@ -81,6 +69,14 @@ static const char * const amd_pstate_mod
|
||||
NULL,
|
||||
};
|
||||
|
||||
+const char *amd_pstate_get_mode_string(enum amd_pstate_mode mode)
|
||||
+{
|
||||
+ if (mode < 0 || mode >= AMD_PSTATE_MAX)
|
||||
+ return NULL;
|
||||
+ return amd_pstate_mode_string[mode];
|
||||
+}
|
||||
+EXPORT_SYMBOL_GPL(amd_pstate_get_mode_string);
|
||||
+
|
||||
struct quirk_entry {
|
||||
u32 nominal_freq;
|
||||
u32 lowest_freq;
|
||||
@@ -1392,7 +1388,7 @@ static ssize_t amd_pstate_show_status(ch
|
||||
return sysfs_emit(buf, "%s\n", amd_pstate_mode_string[cppc_state]);
|
||||
}
|
||||
|
||||
-static int amd_pstate_update_status(const char *buf, size_t size)
|
||||
+int amd_pstate_update_status(const char *buf, size_t size)
|
||||
{
|
||||
int mode_idx;
|
||||
|
||||
@@ -1409,6 +1405,7 @@ static int amd_pstate_update_status(cons
|
||||
|
||||
return 0;
|
||||
}
|
||||
+EXPORT_SYMBOL_GPL(amd_pstate_update_status);
|
||||
|
||||
static ssize_t status_show(struct device *dev,
|
||||
struct device_attribute *attr, char *buf)
|
||||
--- a/drivers/cpufreq/amd-pstate.h
|
||||
+++ b/drivers/cpufreq/amd-pstate.h
|
||||
@@ -103,4 +103,18 @@ struct amd_cpudata {
|
||||
bool boost_state;
|
||||
};
|
||||
|
||||
+/*
|
||||
+ * enum amd_pstate_mode - driver working mode of amd pstate
|
||||
+ */
|
||||
+enum amd_pstate_mode {
|
||||
+ AMD_PSTATE_UNDEFINED = 0,
|
||||
+ AMD_PSTATE_DISABLE,
|
||||
+ AMD_PSTATE_PASSIVE,
|
||||
+ AMD_PSTATE_ACTIVE,
|
||||
+ AMD_PSTATE_GUIDED,
|
||||
+ AMD_PSTATE_MAX,
|
||||
+};
|
||||
+const char *amd_pstate_get_mode_string(enum amd_pstate_mode mode);
|
||||
+int amd_pstate_update_status(const char *buf, size_t size);
|
||||
+
|
||||
#endif /* _LINUX_AMD_PSTATE_H */
|
@ -0,0 +1,77 @@
|
||||
From aabfc7370a7da9c52be97c79ba70a20201e6864a Mon Sep 17 00:00:00 2001
|
||||
From: Mario Limonciello <mario.limonciello@amd.com>
|
||||
Date: Sat, 31 Aug 2024 21:49:12 -0500
|
||||
Subject: cpufreq/amd-pstate-ut: Add test case for mode switches
|
||||
|
||||
There is a state machine in the amd-pstate driver utilized for
|
||||
switches for all modes. To make sure that cleanup and setup works
|
||||
properly for each mode add a unit test case that tries all
|
||||
combinations.
|
||||
|
||||
Signed-off-by: Mario Limonciello <mario.limonciello@amd.com>
|
||||
---
|
||||
drivers/cpufreq/amd-pstate-ut.c | 41 ++++++++++++++++++++++++++++++++-
|
||||
1 file changed, 40 insertions(+), 1 deletion(-)
|
||||
|
||||
--- a/drivers/cpufreq/amd-pstate-ut.c
|
||||
+++ b/drivers/cpufreq/amd-pstate-ut.c
|
||||
@@ -54,12 +54,14 @@ static void amd_pstate_ut_acpi_cpc_valid
|
||||
static void amd_pstate_ut_check_enabled(u32 index);
|
||||
static void amd_pstate_ut_check_perf(u32 index);
|
||||
static void amd_pstate_ut_check_freq(u32 index);
|
||||
+static void amd_pstate_ut_check_driver(u32 index);
|
||||
|
||||
static struct amd_pstate_ut_struct amd_pstate_ut_cases[] = {
|
||||
{"amd_pstate_ut_acpi_cpc_valid", amd_pstate_ut_acpi_cpc_valid },
|
||||
{"amd_pstate_ut_check_enabled", amd_pstate_ut_check_enabled },
|
||||
{"amd_pstate_ut_check_perf", amd_pstate_ut_check_perf },
|
||||
- {"amd_pstate_ut_check_freq", amd_pstate_ut_check_freq }
|
||||
+ {"amd_pstate_ut_check_freq", amd_pstate_ut_check_freq },
|
||||
+ {"amd_pstate_ut_check_driver", amd_pstate_ut_check_driver }
|
||||
};
|
||||
|
||||
static bool get_shared_mem(void)
|
||||
@@ -257,6 +259,43 @@ skip_test:
|
||||
cpufreq_cpu_put(policy);
|
||||
}
|
||||
|
||||
+static int amd_pstate_set_mode(enum amd_pstate_mode mode)
|
||||
+{
|
||||
+ const char *mode_str = amd_pstate_get_mode_string(mode);
|
||||
+
|
||||
+ pr_debug("->setting mode to %s\n", mode_str);
|
||||
+
|
||||
+ return amd_pstate_update_status(mode_str, strlen(mode_str));
|
||||
+}
|
||||
+
|
||||
+static void amd_pstate_ut_check_driver(u32 index)
|
||||
+{
|
||||
+ enum amd_pstate_mode mode1, mode2;
|
||||
+ int ret;
|
||||
+
|
||||
+ for (mode1 = AMD_PSTATE_DISABLE; mode1 < AMD_PSTATE_MAX; mode1++) {
|
||||
+ ret = amd_pstate_set_mode(mode1);
|
||||
+ if (ret)
|
||||
+ goto out;
|
||||
+ for (mode2 = AMD_PSTATE_DISABLE; mode2 < AMD_PSTATE_MAX; mode2++) {
|
||||
+ if (mode1 == mode2)
|
||||
+ continue;
|
||||
+ ret = amd_pstate_set_mode(mode2);
|
||||
+ if (ret)
|
||||
+ goto out;
|
||||
+ }
|
||||
+ }
|
||||
+out:
|
||||
+ if (ret)
|
||||
+ pr_warn("%s: failed to update status for %s->%s: %d\n", __func__,
|
||||
+ amd_pstate_get_mode_string(mode1),
|
||||
+ amd_pstate_get_mode_string(mode2), ret);
|
||||
+
|
||||
+ amd_pstate_ut_cases[index].result = ret ?
|
||||
+ AMD_PSTATE_UT_RESULT_FAIL :
|
||||
+ AMD_PSTATE_UT_RESULT_PASS;
|
||||
+}
|
||||
+
|
||||
static int __init amd_pstate_ut_init(void)
|
||||
{
|
||||
u32 i = 0, arr_size = ARRAY_SIZE(amd_pstate_ut_cases);
|
@ -0,0 +1,60 @@
|
||||
From 24e62fbc101d079d398ac6fc76f458676d3d9491 Mon Sep 17 00:00:00 2001
|
||||
From: Mario Limonciello <mario.limonciello@amd.com>
|
||||
Date: Sun, 1 Sep 2024 00:00:35 -0500
|
||||
Subject: cpufreq/amd-pstate: Catch failures for amd_pstate_epp_update_limit()
|
||||
|
||||
amd_pstate_set_epp() calls cppc_set_epp_perf() which can fail for
|
||||
a variety of reasons but this is ignored. Change the return flow
|
||||
to allow failures.
|
||||
|
||||
Signed-off-by: Mario Limonciello <mario.limonciello@amd.com>
|
||||
---
|
||||
drivers/cpufreq/amd-pstate.c | 11 +++++++----
|
||||
1 file changed, 7 insertions(+), 4 deletions(-)
|
||||
|
||||
--- a/drivers/cpufreq/amd-pstate.c
|
||||
+++ b/drivers/cpufreq/amd-pstate.c
|
||||
@@ -1595,7 +1595,7 @@ static void amd_pstate_epp_cpu_exit(stru
|
||||
pr_debug("CPU %d exiting\n", policy->cpu);
|
||||
}
|
||||
|
||||
-static void amd_pstate_epp_update_limit(struct cpufreq_policy *policy)
|
||||
+static int amd_pstate_epp_update_limit(struct cpufreq_policy *policy)
|
||||
{
|
||||
struct amd_cpudata *cpudata = policy->driver_data;
|
||||
u32 max_perf, min_perf, min_limit_perf, max_limit_perf;
|
||||
@@ -1645,7 +1645,7 @@ static void amd_pstate_epp_update_limit(
|
||||
* This return value can only be negative for shared_memory
|
||||
* systems where EPP register read/write not supported.
|
||||
*/
|
||||
- return;
|
||||
+ return epp;
|
||||
}
|
||||
|
||||
if (cpudata->policy == CPUFREQ_POLICY_PERFORMANCE)
|
||||
@@ -1658,12 +1658,13 @@ static void amd_pstate_epp_update_limit(
|
||||
}
|
||||
|
||||
WRITE_ONCE(cpudata->cppc_req_cached, value);
|
||||
- amd_pstate_set_epp(cpudata, epp);
|
||||
+ return amd_pstate_set_epp(cpudata, epp);
|
||||
}
|
||||
|
||||
static int amd_pstate_epp_set_policy(struct cpufreq_policy *policy)
|
||||
{
|
||||
struct amd_cpudata *cpudata = policy->driver_data;
|
||||
+ int ret;
|
||||
|
||||
if (!policy->cpuinfo.max_freq)
|
||||
return -ENODEV;
|
||||
@@ -1673,7 +1674,9 @@ static int amd_pstate_epp_set_policy(str
|
||||
|
||||
cpudata->policy = policy->policy;
|
||||
|
||||
- amd_pstate_epp_update_limit(policy);
|
||||
+ ret = amd_pstate_epp_update_limit(policy);
|
||||
+ if (ret)
|
||||
+ return ret;
|
||||
|
||||
/*
|
||||
* policy->cur is never updated with the amd_pstate_epp driver, but it
|
@ -0,0 +1,67 @@
|
||||
From 29c0347dd542e091e2f7e5980dd885f918f5f676 Mon Sep 17 00:00:00 2001
|
||||
From: Mario Limonciello <mario.limonciello@amd.com>
|
||||
Date: Thu, 5 Sep 2024 11:29:57 -0500
|
||||
Subject: x86/amd: Move amd_get_highest_perf() from amd.c to cppc.c
|
||||
|
||||
To prepare to let amd_get_highest_perf() detect preferred cores
|
||||
it will require CPPC functions. Move amd_get_highest_perf() to
|
||||
cppc.c to prepare for 'preferred core detection' rework.
|
||||
|
||||
No functional changes intended.
|
||||
|
||||
Reviewed-by: Perry Yuan <perry.yuan@amd.com>
|
||||
Reviewed-by: Gautham R. Shenoy <gautham.shenoy@amd.com>
|
||||
Signed-off-by: Mario Limonciello <mario.limonciello@amd.com>
|
||||
Reviewed-by: Gautham R. Shenoy <gautham.sheoy@amd.com>
|
||||
---
|
||||
arch/x86/kernel/acpi/cppc.c | 16 ++++++++++++++++
|
||||
arch/x86/kernel/cpu/amd.c | 16 ----------------
|
||||
2 files changed, 16 insertions(+), 16 deletions(-)
|
||||
|
||||
--- a/arch/x86/kernel/acpi/cppc.c
|
||||
+++ b/arch/x86/kernel/acpi/cppc.c
|
||||
@@ -116,3 +116,19 @@ void init_freq_invariance_cppc(void)
|
||||
init_done = true;
|
||||
mutex_unlock(&freq_invariance_lock);
|
||||
}
|
||||
+
|
||||
+u32 amd_get_highest_perf(void)
|
||||
+{
|
||||
+ struct cpuinfo_x86 *c = &boot_cpu_data;
|
||||
+
|
||||
+ if (c->x86 == 0x17 && ((c->x86_model >= 0x30 && c->x86_model < 0x40) ||
|
||||
+ (c->x86_model >= 0x70 && c->x86_model < 0x80)))
|
||||
+ return 166;
|
||||
+
|
||||
+ if (c->x86 == 0x19 && ((c->x86_model >= 0x20 && c->x86_model < 0x30) ||
|
||||
+ (c->x86_model >= 0x40 && c->x86_model < 0x70)))
|
||||
+ return 166;
|
||||
+
|
||||
+ return 255;
|
||||
+}
|
||||
+EXPORT_SYMBOL_GPL(amd_get_highest_perf);
|
||||
--- a/arch/x86/kernel/cpu/amd.c
|
||||
+++ b/arch/x86/kernel/cpu/amd.c
|
||||
@@ -1190,22 +1190,6 @@ unsigned long amd_get_dr_addr_mask(unsig
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(amd_get_dr_addr_mask);
|
||||
|
||||
-u32 amd_get_highest_perf(void)
|
||||
-{
|
||||
- struct cpuinfo_x86 *c = &boot_cpu_data;
|
||||
-
|
||||
- if (c->x86 == 0x17 && ((c->x86_model >= 0x30 && c->x86_model < 0x40) ||
|
||||
- (c->x86_model >= 0x70 && c->x86_model < 0x80)))
|
||||
- return 166;
|
||||
-
|
||||
- if (c->x86 == 0x19 && ((c->x86_model >= 0x20 && c->x86_model < 0x30) ||
|
||||
- (c->x86_model >= 0x40 && c->x86_model < 0x70)))
|
||||
- return 166;
|
||||
-
|
||||
- return 255;
|
||||
-}
|
||||
-EXPORT_SYMBOL_GPL(amd_get_highest_perf);
|
||||
-
|
||||
static void zenbleed_check_cpu(void *unused)
|
||||
{
|
||||
struct cpuinfo_x86 *c = &cpu_data(smp_processor_id());
|
@ -0,0 +1,95 @@
|
||||
From 072efeb45349edd8ba9def11b6a450eaf56690a8 Mon Sep 17 00:00:00 2001
|
||||
From: Mario Limonciello <mario.limonciello@amd.com>
|
||||
Date: Thu, 5 Sep 2024 11:29:58 -0500
|
||||
Subject: ACPI: CPPC: Adjust return code for inline functions in
|
||||
!CONFIG_ACPI_CPPC_LIB
|
||||
|
||||
Checkpath emits the following warning:
|
||||
```
|
||||
WARNING: ENOTSUPP is not a SUSV4 error code, prefer EOPNOTSUPP
|
||||
```
|
||||
|
||||
Adjust the code accordingly.
|
||||
|
||||
Reviewed-by: Gautham R. Shenoy <gautham.shenoy@amd.com>
|
||||
Signed-off-by: Mario Limonciello <mario.limonciello@amd.com>
|
||||
Reviewed-by: Gautham R. Shenoy <gautham.sheoy@amd.com>
|
||||
---
|
||||
include/acpi/cppc_acpi.h | 26 +++++++++++++-------------
|
||||
1 file changed, 13 insertions(+), 13 deletions(-)
|
||||
|
||||
--- a/include/acpi/cppc_acpi.h
|
||||
+++ b/include/acpi/cppc_acpi.h
|
||||
@@ -164,31 +164,31 @@ extern int cppc_set_auto_sel(int cpu, bo
|
||||
#else /* !CONFIG_ACPI_CPPC_LIB */
|
||||
static inline int cppc_get_desired_perf(int cpunum, u64 *desired_perf)
|
||||
{
|
||||
- return -ENOTSUPP;
|
||||
+ return -EOPNOTSUPP;
|
||||
}
|
||||
static inline int cppc_get_nominal_perf(int cpunum, u64 *nominal_perf)
|
||||
{
|
||||
- return -ENOTSUPP;
|
||||
+ return -EOPNOTSUPP;
|
||||
}
|
||||
static inline int cppc_get_highest_perf(int cpunum, u64 *highest_perf)
|
||||
{
|
||||
- return -ENOTSUPP;
|
||||
+ return -EOPNOTSUPP;
|
||||
}
|
||||
static inline int cppc_get_perf_ctrs(int cpu, struct cppc_perf_fb_ctrs *perf_fb_ctrs)
|
||||
{
|
||||
- return -ENOTSUPP;
|
||||
+ return -EOPNOTSUPP;
|
||||
}
|
||||
static inline int cppc_set_perf(int cpu, struct cppc_perf_ctrls *perf_ctrls)
|
||||
{
|
||||
- return -ENOTSUPP;
|
||||
+ return -EOPNOTSUPP;
|
||||
}
|
||||
static inline int cppc_set_enable(int cpu, bool enable)
|
||||
{
|
||||
- return -ENOTSUPP;
|
||||
+ return -EOPNOTSUPP;
|
||||
}
|
||||
static inline int cppc_get_perf_caps(int cpu, struct cppc_perf_caps *caps)
|
||||
{
|
||||
- return -ENOTSUPP;
|
||||
+ return -EOPNOTSUPP;
|
||||
}
|
||||
static inline bool cppc_perf_ctrs_in_pcc(void)
|
||||
{
|
||||
@@ -212,27 +212,27 @@ static inline bool cpc_ffh_supported(voi
|
||||
}
|
||||
static inline int cpc_read_ffh(int cpunum, struct cpc_reg *reg, u64 *val)
|
||||
{
|
||||
- return -ENOTSUPP;
|
||||
+ return -EOPNOTSUPP;
|
||||
}
|
||||
static inline int cpc_write_ffh(int cpunum, struct cpc_reg *reg, u64 val)
|
||||
{
|
||||
- return -ENOTSUPP;
|
||||
+ return -EOPNOTSUPP;
|
||||
}
|
||||
static inline int cppc_set_epp_perf(int cpu, struct cppc_perf_ctrls *perf_ctrls, bool enable)
|
||||
{
|
||||
- return -ENOTSUPP;
|
||||
+ return -EOPNOTSUPP;
|
||||
}
|
||||
static inline int cppc_get_epp_perf(int cpunum, u64 *epp_perf)
|
||||
{
|
||||
- return -ENOTSUPP;
|
||||
+ return -EOPNOTSUPP;
|
||||
}
|
||||
static inline int cppc_set_auto_sel(int cpu, bool enable)
|
||||
{
|
||||
- return -ENOTSUPP;
|
||||
+ return -EOPNOTSUPP;
|
||||
}
|
||||
static inline int cppc_get_auto_sel_caps(int cpunum, struct cppc_perf_caps *perf_caps)
|
||||
{
|
||||
- return -ENOTSUPP;
|
||||
+ return -EOPNOTSUPP;
|
||||
}
|
||||
#endif /* !CONFIG_ACPI_CPPC_LIB */
|
||||
|
@ -0,0 +1,162 @@
|
||||
From 21492d91ffc7c3fdb6507f64a74abf8326c75141 Mon Sep 17 00:00:00 2001
|
||||
From: Mario Limonciello <mario.limonciello@amd.com>
|
||||
Date: Thu, 5 Sep 2024 11:29:59 -0500
|
||||
Subject: x86/amd: Rename amd_get_highest_perf() to
|
||||
amd_get_boost_ratio_numerator()
|
||||
|
||||
The function name is ambiguous because it returns an intermediate value
|
||||
for calculating maximum frequency rather than the CPPC 'Highest Perf'
|
||||
register.
|
||||
|
||||
Rename the function to clarify its use and allow the function to return
|
||||
errors. Adjust the consumer in acpi-cpufreq to catch errors.
|
||||
|
||||
Reviewed-by: Gautham R. Shenoy <gautham.shenoy@amd.com>
|
||||
Signed-off-by: Mario Limonciello <mario.limonciello@amd.com>
|
||||
Reviewed-by: Gautham R. Shenoy <gautham.sheoy@amd.com>
|
||||
---
|
||||
arch/x86/include/asm/processor.h | 3 ---
|
||||
arch/x86/kernel/acpi/cppc.c | 44 +++++++++++++++++++++++---------
|
||||
drivers/cpufreq/acpi-cpufreq.c | 12 ++++++---
|
||||
include/acpi/cppc_acpi.h | 5 ++++
|
||||
4 files changed, 46 insertions(+), 18 deletions(-)
|
||||
|
||||
--- a/arch/x86/include/asm/processor.h
|
||||
+++ b/arch/x86/include/asm/processor.h
|
||||
@@ -691,8 +691,6 @@ static inline u32 per_cpu_l2c_id(unsigne
|
||||
}
|
||||
|
||||
#ifdef CONFIG_CPU_SUP_AMD
|
||||
-extern u32 amd_get_highest_perf(void);
|
||||
-
|
||||
/*
|
||||
* Issue a DIV 0/1 insn to clear any division data from previous DIV
|
||||
* operations.
|
||||
@@ -705,7 +703,6 @@ static __always_inline void amd_clear_di
|
||||
|
||||
extern void amd_check_microcode(void);
|
||||
#else
|
||||
-static inline u32 amd_get_highest_perf(void) { return 0; }
|
||||
static inline void amd_clear_divider(void) { }
|
||||
static inline void amd_check_microcode(void) { }
|
||||
#endif
|
||||
--- a/arch/x86/kernel/acpi/cppc.c
|
||||
+++ b/arch/x86/kernel/acpi/cppc.c
|
||||
@@ -69,7 +69,7 @@ int cpc_write_ffh(int cpunum, struct cpc
|
||||
static void amd_set_max_freq_ratio(void)
|
||||
{
|
||||
struct cppc_perf_caps perf_caps;
|
||||
- u64 highest_perf, nominal_perf;
|
||||
+ u64 numerator, nominal_perf;
|
||||
u64 perf_ratio;
|
||||
int rc;
|
||||
|
||||
@@ -79,15 +79,19 @@ static void amd_set_max_freq_ratio(void)
|
||||
return;
|
||||
}
|
||||
|
||||
- highest_perf = amd_get_highest_perf();
|
||||
+ rc = amd_get_boost_ratio_numerator(0, &numerator);
|
||||
+ if (rc) {
|
||||
+ pr_debug("Could not retrieve highest performance (%d)\n", rc);
|
||||
+ return;
|
||||
+ }
|
||||
nominal_perf = perf_caps.nominal_perf;
|
||||
|
||||
- if (!highest_perf || !nominal_perf) {
|
||||
- pr_debug("Could not retrieve highest or nominal performance\n");
|
||||
+ if (!nominal_perf) {
|
||||
+ pr_debug("Could not retrieve nominal performance\n");
|
||||
return;
|
||||
}
|
||||
|
||||
- perf_ratio = div_u64(highest_perf * SCHED_CAPACITY_SCALE, nominal_perf);
|
||||
+ perf_ratio = div_u64(numerator * SCHED_CAPACITY_SCALE, nominal_perf);
|
||||
/* midpoint between max_boost and max_P */
|
||||
perf_ratio = (perf_ratio + SCHED_CAPACITY_SCALE) >> 1;
|
||||
if (!perf_ratio) {
|
||||
@@ -117,18 +121,34 @@ void init_freq_invariance_cppc(void)
|
||||
mutex_unlock(&freq_invariance_lock);
|
||||
}
|
||||
|
||||
-u32 amd_get_highest_perf(void)
|
||||
+/**
|
||||
+ * amd_get_boost_ratio_numerator: Get the numerator to use for boost ratio calculation
|
||||
+ * @cpu: CPU to get numerator for.
|
||||
+ * @numerator: Output variable for numerator.
|
||||
+ *
|
||||
+ * Determine the numerator to use for calculating the boost ratio on
|
||||
+ * a CPU. On systems that support preferred cores, this will be a hardcoded
|
||||
+ * value. On other systems this will the highest performance register value.
|
||||
+ *
|
||||
+ * Return: 0 for success, negative error code otherwise.
|
||||
+ */
|
||||
+int amd_get_boost_ratio_numerator(unsigned int cpu, u64 *numerator)
|
||||
{
|
||||
struct cpuinfo_x86 *c = &boot_cpu_data;
|
||||
|
||||
if (c->x86 == 0x17 && ((c->x86_model >= 0x30 && c->x86_model < 0x40) ||
|
||||
- (c->x86_model >= 0x70 && c->x86_model < 0x80)))
|
||||
- return 166;
|
||||
+ (c->x86_model >= 0x70 && c->x86_model < 0x80))) {
|
||||
+ *numerator = 166;
|
||||
+ return 0;
|
||||
+ }
|
||||
|
||||
if (c->x86 == 0x19 && ((c->x86_model >= 0x20 && c->x86_model < 0x30) ||
|
||||
- (c->x86_model >= 0x40 && c->x86_model < 0x70)))
|
||||
- return 166;
|
||||
+ (c->x86_model >= 0x40 && c->x86_model < 0x70))) {
|
||||
+ *numerator = 166;
|
||||
+ return 0;
|
||||
+ }
|
||||
+ *numerator = 255;
|
||||
|
||||
- return 255;
|
||||
+ return 0;
|
||||
}
|
||||
-EXPORT_SYMBOL_GPL(amd_get_highest_perf);
|
||||
+EXPORT_SYMBOL_GPL(amd_get_boost_ratio_numerator);
|
||||
--- a/drivers/cpufreq/acpi-cpufreq.c
|
||||
+++ b/drivers/cpufreq/acpi-cpufreq.c
|
||||
@@ -642,10 +642,16 @@ static u64 get_max_boost_ratio(unsigned
|
||||
return 0;
|
||||
}
|
||||
|
||||
- if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD)
|
||||
- highest_perf = amd_get_highest_perf();
|
||||
- else
|
||||
+ if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) {
|
||||
+ ret = amd_get_boost_ratio_numerator(cpu, &highest_perf);
|
||||
+ if (ret) {
|
||||
+ pr_debug("CPU%d: Unable to get boost ratio numerator (%d)\n",
|
||||
+ cpu, ret);
|
||||
+ return 0;
|
||||
+ }
|
||||
+ } else {
|
||||
highest_perf = perf_caps.highest_perf;
|
||||
+ }
|
||||
|
||||
nominal_perf = perf_caps.nominal_perf;
|
||||
|
||||
--- a/include/acpi/cppc_acpi.h
|
||||
+++ b/include/acpi/cppc_acpi.h
|
||||
@@ -161,6 +161,7 @@ extern int cppc_get_epp_perf(int cpunum,
|
||||
extern int cppc_set_epp_perf(int cpu, struct cppc_perf_ctrls *perf_ctrls, bool enable);
|
||||
extern int cppc_get_auto_sel_caps(int cpunum, struct cppc_perf_caps *perf_caps);
|
||||
extern int cppc_set_auto_sel(int cpu, bool enable);
|
||||
+extern int amd_get_boost_ratio_numerator(unsigned int cpu, u64 *numerator);
|
||||
#else /* !CONFIG_ACPI_CPPC_LIB */
|
||||
static inline int cppc_get_desired_perf(int cpunum, u64 *desired_perf)
|
||||
{
|
||||
@@ -234,6 +235,10 @@ static inline int cppc_get_auto_sel_caps
|
||||
{
|
||||
return -EOPNOTSUPP;
|
||||
}
|
||||
+static inline int amd_get_boost_ratio_numerator(unsigned int cpu, u64 *numerator)
|
||||
+{
|
||||
+ return -EOPNOTSUPP;
|
||||
+}
|
||||
#endif /* !CONFIG_ACPI_CPPC_LIB */
|
||||
|
||||
#endif /* _CPPC_ACPI_H*/
|
35
debian/patches/patchset-pf/amd-pstate/0008-ACPI-CPPC-Drop-check-for-non-zero-perf-ratio.patch
vendored
Normal file
35
debian/patches/patchset-pf/amd-pstate/0008-ACPI-CPPC-Drop-check-for-non-zero-perf-ratio.patch
vendored
Normal file
@ -0,0 +1,35 @@
|
||||
From 6f10d066dce0f1781b514a0352f0b427a32b1bb2 Mon Sep 17 00:00:00 2001
|
||||
From: Mario Limonciello <mario.limonciello@amd.com>
|
||||
Date: Thu, 5 Sep 2024 11:30:00 -0500
|
||||
Subject: ACPI: CPPC: Drop check for non zero perf ratio
|
||||
|
||||
perf_ratio is a u64 and SCHED_CAPACITY_SCALE is a large number.
|
||||
Shifting by one will never have a zero value.
|
||||
|
||||
Drop the check.
|
||||
|
||||
Suggested-by: Gautham R. Shenoy <gautham.shenoy@amd.com>
|
||||
Reviewed-by: Gautham R. Shenoy <gautham.sheoy@amd.com>
|
||||
Signed-off-by: Mario Limonciello <mario.limonciello@amd.com>
|
||||
Reviewed-by: Gautham R. Shenoy <gautham.shenoy@amd.com>
|
||||
---
|
||||
arch/x86/kernel/acpi/cppc.c | 7 +------
|
||||
1 file changed, 1 insertion(+), 6 deletions(-)
|
||||
|
||||
--- a/arch/x86/kernel/acpi/cppc.c
|
||||
+++ b/arch/x86/kernel/acpi/cppc.c
|
||||
@@ -91,13 +91,8 @@ static void amd_set_max_freq_ratio(void)
|
||||
return;
|
||||
}
|
||||
|
||||
- perf_ratio = div_u64(numerator * SCHED_CAPACITY_SCALE, nominal_perf);
|
||||
/* midpoint between max_boost and max_P */
|
||||
- perf_ratio = (perf_ratio + SCHED_CAPACITY_SCALE) >> 1;
|
||||
- if (!perf_ratio) {
|
||||
- pr_debug("Non-zero highest/nominal perf values led to a 0 ratio\n");
|
||||
- return;
|
||||
- }
|
||||
+ perf_ratio = (div_u64(numerator * SCHED_CAPACITY_SCALE, nominal_perf) + SCHED_CAPACITY_SCALE) >> 1;
|
||||
|
||||
freq_invariance_set_perf_ratio(perf_ratio, false);
|
||||
}
|
@ -0,0 +1,44 @@
|
||||
From 8c142a91a58f24119e99d4e66b11890f4a4ef984 Mon Sep 17 00:00:00 2001
|
||||
From: Mario Limonciello <mario.limonciello@amd.com>
|
||||
Date: Thu, 5 Sep 2024 11:30:01 -0500
|
||||
Subject: ACPI: CPPC: Adjust debug messages in amd_set_max_freq_ratio() to warn
|
||||
|
||||
If the boost ratio isn't calculated properly for the system for any
|
||||
reason this can cause other problems that are non-obvious.
|
||||
|
||||
Raise all messages to warn instead.
|
||||
|
||||
Suggested-by: Perry Yuan <Perry.Yuan@amd.com>
|
||||
Reviewed-by: Perry Yuan <perry.yuan@amd.com>
|
||||
Reviewed-by: Gautham R. Shenoy <gautham.shenoy@amd.com>
|
||||
Signed-off-by: Mario Limonciello <mario.limonciello@amd.com>
|
||||
Reviewed-by: Gautham R. Shenoy <gautham.sheoy@amd.com>
|
||||
---
|
||||
arch/x86/kernel/acpi/cppc.c | 6 +++---
|
||||
1 file changed, 3 insertions(+), 3 deletions(-)
|
||||
|
||||
--- a/arch/x86/kernel/acpi/cppc.c
|
||||
+++ b/arch/x86/kernel/acpi/cppc.c
|
||||
@@ -75,19 +75,19 @@ static void amd_set_max_freq_ratio(void)
|
||||
|
||||
rc = cppc_get_perf_caps(0, &perf_caps);
|
||||
if (rc) {
|
||||
- pr_debug("Could not retrieve perf counters (%d)\n", rc);
|
||||
+ pr_warn("Could not retrieve perf counters (%d)\n", rc);
|
||||
return;
|
||||
}
|
||||
|
||||
rc = amd_get_boost_ratio_numerator(0, &numerator);
|
||||
if (rc) {
|
||||
- pr_debug("Could not retrieve highest performance (%d)\n", rc);
|
||||
+ pr_warn("Could not retrieve highest performance (%d)\n", rc);
|
||||
return;
|
||||
}
|
||||
nominal_perf = perf_caps.nominal_perf;
|
||||
|
||||
if (!nominal_perf) {
|
||||
- pr_debug("Could not retrieve nominal performance\n");
|
||||
+ pr_warn("Could not retrieve nominal performance\n");
|
||||
return;
|
||||
}
|
||||
|
138
debian/patches/patchset-pf/amd-pstate/0010-x86-amd-Move-amd_get_highest_perf-out-of-amd-pstate.patch
vendored
Normal file
138
debian/patches/patchset-pf/amd-pstate/0010-x86-amd-Move-amd_get_highest_perf-out-of-amd-pstate.patch
vendored
Normal file
@ -0,0 +1,138 @@
|
||||
From 952e7bdc4cf67603f230f8eb91818ad4676e5a83 Mon Sep 17 00:00:00 2001
|
||||
From: Mario Limonciello <mario.limonciello@amd.com>
|
||||
Date: Thu, 5 Sep 2024 11:30:02 -0500
|
||||
Subject: x86/amd: Move amd_get_highest_perf() out of amd-pstate
|
||||
|
||||
amd_pstate_get_highest_perf() is a helper used to get the highest perf
|
||||
value on AMD systems. It's used in amd-pstate as part of preferred
|
||||
core handling, but applicable for acpi-cpufreq as well.
|
||||
|
||||
Move it out to cppc handling code as amd_get_highest_perf().
|
||||
|
||||
Reviewed-by: Perry Yuan <perry.yuan@amd.com>
|
||||
Reviewed-by: Gautham R. Shenoy <gautham.shenoy@amd.com>
|
||||
Signed-off-by: Mario Limonciello <mario.limonciello@amd.com>
|
||||
Reviewed-by: Gautham R. Shenoy <gautham.sheoy@amd.com>
|
||||
---
|
||||
arch/x86/kernel/acpi/cppc.c | 30 ++++++++++++++++++++++++++++++
|
||||
drivers/cpufreq/amd-pstate.c | 34 ++--------------------------------
|
||||
include/acpi/cppc_acpi.h | 5 +++++
|
||||
3 files changed, 37 insertions(+), 32 deletions(-)
|
||||
|
||||
--- a/arch/x86/kernel/acpi/cppc.c
|
||||
+++ b/arch/x86/kernel/acpi/cppc.c
|
||||
@@ -116,6 +116,36 @@ void init_freq_invariance_cppc(void)
|
||||
mutex_unlock(&freq_invariance_lock);
|
||||
}
|
||||
|
||||
+/*
|
||||
+ * Get the highest performance register value.
|
||||
+ * @cpu: CPU from which to get highest performance.
|
||||
+ * @highest_perf: Return address for highest performance value.
|
||||
+ *
|
||||
+ * Return: 0 for success, negative error code otherwise.
|
||||
+ */
|
||||
+int amd_get_highest_perf(unsigned int cpu, u32 *highest_perf)
|
||||
+{
|
||||
+ u64 val;
|
||||
+ int ret;
|
||||
+
|
||||
+ if (cpu_feature_enabled(X86_FEATURE_CPPC)) {
|
||||
+ ret = rdmsrl_safe_on_cpu(cpu, MSR_AMD_CPPC_CAP1, &val);
|
||||
+ if (ret)
|
||||
+ goto out;
|
||||
+
|
||||
+ val = AMD_CPPC_HIGHEST_PERF(val);
|
||||
+ } else {
|
||||
+ ret = cppc_get_highest_perf(cpu, &val);
|
||||
+ if (ret)
|
||||
+ goto out;
|
||||
+ }
|
||||
+
|
||||
+ WRITE_ONCE(*highest_perf, (u32)val);
|
||||
+out:
|
||||
+ return ret;
|
||||
+}
|
||||
+EXPORT_SYMBOL_GPL(amd_get_highest_perf);
|
||||
+
|
||||
/**
|
||||
* amd_get_boost_ratio_numerator: Get the numerator to use for boost ratio calculation
|
||||
* @cpu: CPU to get numerator for.
|
||||
--- a/drivers/cpufreq/amd-pstate.c
|
||||
+++ b/drivers/cpufreq/amd-pstate.c
|
||||
@@ -837,36 +837,6 @@ static void amd_pstste_sched_prefcore_wo
|
||||
}
|
||||
static DECLARE_WORK(sched_prefcore_work, amd_pstste_sched_prefcore_workfn);
|
||||
|
||||
-/*
|
||||
- * Get the highest performance register value.
|
||||
- * @cpu: CPU from which to get highest performance.
|
||||
- * @highest_perf: Return address.
|
||||
- *
|
||||
- * Return: 0 for success, -EIO otherwise.
|
||||
- */
|
||||
-static int amd_pstate_get_highest_perf(int cpu, u32 *highest_perf)
|
||||
-{
|
||||
- int ret;
|
||||
-
|
||||
- if (cpu_feature_enabled(X86_FEATURE_CPPC)) {
|
||||
- u64 cap1;
|
||||
-
|
||||
- ret = rdmsrl_safe_on_cpu(cpu, MSR_AMD_CPPC_CAP1, &cap1);
|
||||
- if (ret)
|
||||
- return ret;
|
||||
- WRITE_ONCE(*highest_perf, AMD_CPPC_HIGHEST_PERF(cap1));
|
||||
- } else {
|
||||
- u64 cppc_highest_perf;
|
||||
-
|
||||
- ret = cppc_get_highest_perf(cpu, &cppc_highest_perf);
|
||||
- if (ret)
|
||||
- return ret;
|
||||
- WRITE_ONCE(*highest_perf, cppc_highest_perf);
|
||||
- }
|
||||
-
|
||||
- return (ret);
|
||||
-}
|
||||
-
|
||||
#define CPPC_MAX_PERF U8_MAX
|
||||
|
||||
static void amd_pstate_init_prefcore(struct amd_cpudata *cpudata)
|
||||
@@ -874,7 +844,7 @@ static void amd_pstate_init_prefcore(str
|
||||
int ret, prio;
|
||||
u32 highest_perf;
|
||||
|
||||
- ret = amd_pstate_get_highest_perf(cpudata->cpu, &highest_perf);
|
||||
+ ret = amd_get_highest_perf(cpudata->cpu, &highest_perf);
|
||||
if (ret)
|
||||
return;
|
||||
|
||||
@@ -918,7 +888,7 @@ static void amd_pstate_update_limits(uns
|
||||
if ((!amd_pstate_prefcore) || (!cpudata->hw_prefcore))
|
||||
goto free_cpufreq_put;
|
||||
|
||||
- ret = amd_pstate_get_highest_perf(cpu, &cur_high);
|
||||
+ ret = amd_get_highest_perf(cpu, &cur_high);
|
||||
if (ret)
|
||||
goto free_cpufreq_put;
|
||||
|
||||
--- a/include/acpi/cppc_acpi.h
|
||||
+++ b/include/acpi/cppc_acpi.h
|
||||
@@ -161,6 +161,7 @@ extern int cppc_get_epp_perf(int cpunum,
|
||||
extern int cppc_set_epp_perf(int cpu, struct cppc_perf_ctrls *perf_ctrls, bool enable);
|
||||
extern int cppc_get_auto_sel_caps(int cpunum, struct cppc_perf_caps *perf_caps);
|
||||
extern int cppc_set_auto_sel(int cpu, bool enable);
|
||||
+extern int amd_get_highest_perf(unsigned int cpu, u32 *highest_perf);
|
||||
extern int amd_get_boost_ratio_numerator(unsigned int cpu, u64 *numerator);
|
||||
#else /* !CONFIG_ACPI_CPPC_LIB */
|
||||
static inline int cppc_get_desired_perf(int cpunum, u64 *desired_perf)
|
||||
@@ -235,6 +236,10 @@ static inline int cppc_get_auto_sel_caps
|
||||
{
|
||||
return -EOPNOTSUPP;
|
||||
}
|
||||
+static inline int amd_get_highest_perf(unsigned int cpu, u32 *highest_perf)
|
||||
+{
|
||||
+ return -ENODEV;
|
||||
+}
|
||||
static inline int amd_get_boost_ratio_numerator(unsigned int cpu, u64 *numerator)
|
||||
{
|
||||
return -EOPNOTSUPP;
|
@ -0,0 +1,251 @@
|
||||
From 3ab7da5bbf2087982dbfe2b0f2937d0dddc3afb1 Mon Sep 17 00:00:00 2001
|
||||
From: Mario Limonciello <mario.limonciello@amd.com>
|
||||
Date: Thu, 5 Sep 2024 11:30:03 -0500
|
||||
Subject: x86/amd: Detect preferred cores in amd_get_boost_ratio_numerator()
|
||||
|
||||
AMD systems that support preferred cores will use "166" as their
|
||||
numerator for max frequency calculations instead of "255".
|
||||
|
||||
Add a function for detecting preferred cores by looking at the
|
||||
highest perf value on all cores.
|
||||
|
||||
If preferred cores are enabled return 166 and if disabled the
|
||||
value in the highest perf register. As the function will be called
|
||||
multiple times, cache the values for the boost numerator and if
|
||||
preferred cores will be enabled in global variables.
|
||||
|
||||
Reviewed-by: Gautham R. Shenoy <gautham.shenoy@amd.com>
|
||||
Signed-off-by: Mario Limonciello <mario.limonciello@amd.com>
|
||||
---
|
||||
arch/x86/kernel/acpi/cppc.c | 93 ++++++++++++++++++++++++++++++++----
|
||||
drivers/cpufreq/amd-pstate.c | 34 +++++--------
|
||||
include/acpi/cppc_acpi.h | 5 ++
|
||||
3 files changed, 101 insertions(+), 31 deletions(-)
|
||||
|
||||
--- a/arch/x86/kernel/acpi/cppc.c
|
||||
+++ b/arch/x86/kernel/acpi/cppc.c
|
||||
@@ -9,6 +9,16 @@
|
||||
#include <asm/processor.h>
|
||||
#include <asm/topology.h>
|
||||
|
||||
+#define CPPC_HIGHEST_PERF_PREFCORE 166
|
||||
+
|
||||
+enum amd_pref_core {
|
||||
+ AMD_PREF_CORE_UNKNOWN = 0,
|
||||
+ AMD_PREF_CORE_SUPPORTED,
|
||||
+ AMD_PREF_CORE_UNSUPPORTED,
|
||||
+};
|
||||
+static enum amd_pref_core amd_pref_core_detected;
|
||||
+static u64 boost_numerator;
|
||||
+
|
||||
/* Refer to drivers/acpi/cppc_acpi.c for the description of functions */
|
||||
|
||||
bool cpc_supported_by_cpu(void)
|
||||
@@ -147,6 +157,66 @@ out:
|
||||
EXPORT_SYMBOL_GPL(amd_get_highest_perf);
|
||||
|
||||
/**
|
||||
+ * amd_detect_prefcore: Detect if CPUs in the system support preferred cores
|
||||
+ * @detected: Output variable for the result of the detection.
|
||||
+ *
|
||||
+ * Determine whether CPUs in the system support preferred cores. On systems
|
||||
+ * that support preferred cores, different highest perf values will be found
|
||||
+ * on different cores. On other systems, the highest perf value will be the
|
||||
+ * same on all cores.
|
||||
+ *
|
||||
+ * The result of the detection will be stored in the 'detected' parameter.
|
||||
+ *
|
||||
+ * Return: 0 for success, negative error code otherwise
|
||||
+ */
|
||||
+int amd_detect_prefcore(bool *detected)
|
||||
+{
|
||||
+ int cpu, count = 0;
|
||||
+ u64 highest_perf[2] = {0};
|
||||
+
|
||||
+ if (WARN_ON(!detected))
|
||||
+ return -EINVAL;
|
||||
+
|
||||
+ switch (amd_pref_core_detected) {
|
||||
+ case AMD_PREF_CORE_SUPPORTED:
|
||||
+ *detected = true;
|
||||
+ return 0;
|
||||
+ case AMD_PREF_CORE_UNSUPPORTED:
|
||||
+ *detected = false;
|
||||
+ return 0;
|
||||
+ default:
|
||||
+ break;
|
||||
+ }
|
||||
+
|
||||
+ for_each_present_cpu(cpu) {
|
||||
+ u32 tmp;
|
||||
+ int ret;
|
||||
+
|
||||
+ ret = amd_get_highest_perf(cpu, &tmp);
|
||||
+ if (ret)
|
||||
+ return ret;
|
||||
+
|
||||
+ if (!count || (count == 1 && tmp != highest_perf[0]))
|
||||
+ highest_perf[count++] = tmp;
|
||||
+
|
||||
+ if (count == 2)
|
||||
+ break;
|
||||
+ }
|
||||
+
|
||||
+ *detected = (count == 2);
|
||||
+ boost_numerator = highest_perf[0];
|
||||
+
|
||||
+ amd_pref_core_detected = *detected ? AMD_PREF_CORE_SUPPORTED :
|
||||
+ AMD_PREF_CORE_UNSUPPORTED;
|
||||
+
|
||||
+ pr_debug("AMD CPPC preferred core is %ssupported (highest perf: 0x%llx)\n",
|
||||
+ *detected ? "" : "un", highest_perf[0]);
|
||||
+
|
||||
+ return 0;
|
||||
+}
|
||||
+EXPORT_SYMBOL_GPL(amd_detect_prefcore);
|
||||
+
|
||||
+/**
|
||||
* amd_get_boost_ratio_numerator: Get the numerator to use for boost ratio calculation
|
||||
* @cpu: CPU to get numerator for.
|
||||
* @numerator: Output variable for numerator.
|
||||
@@ -155,24 +225,27 @@ EXPORT_SYMBOL_GPL(amd_get_highest_perf);
|
||||
* a CPU. On systems that support preferred cores, this will be a hardcoded
|
||||
* value. On other systems this will the highest performance register value.
|
||||
*
|
||||
+ * If booting the system with amd-pstate enabled but preferred cores disabled then
|
||||
+ * the correct boost numerator will be returned to match hardware capabilities
|
||||
+ * even if the preferred cores scheduling hints are not enabled.
|
||||
+ *
|
||||
* Return: 0 for success, negative error code otherwise.
|
||||
*/
|
||||
int amd_get_boost_ratio_numerator(unsigned int cpu, u64 *numerator)
|
||||
{
|
||||
- struct cpuinfo_x86 *c = &boot_cpu_data;
|
||||
-
|
||||
- if (c->x86 == 0x17 && ((c->x86_model >= 0x30 && c->x86_model < 0x40) ||
|
||||
- (c->x86_model >= 0x70 && c->x86_model < 0x80))) {
|
||||
- *numerator = 166;
|
||||
- return 0;
|
||||
- }
|
||||
+ bool prefcore;
|
||||
+ int ret;
|
||||
|
||||
- if (c->x86 == 0x19 && ((c->x86_model >= 0x20 && c->x86_model < 0x30) ||
|
||||
- (c->x86_model >= 0x40 && c->x86_model < 0x70))) {
|
||||
- *numerator = 166;
|
||||
+ ret = amd_detect_prefcore(&prefcore);
|
||||
+ if (ret)
|
||||
+ return ret;
|
||||
+
|
||||
+ /* without preferred cores, return the highest perf register value */
|
||||
+ if (!prefcore) {
|
||||
+ *numerator = boost_numerator;
|
||||
return 0;
|
||||
}
|
||||
- *numerator = 255;
|
||||
+ *numerator = CPPC_HIGHEST_PERF_PREFCORE;
|
||||
|
||||
return 0;
|
||||
}
|
||||
--- a/drivers/cpufreq/amd-pstate.c
|
||||
+++ b/drivers/cpufreq/amd-pstate.c
|
||||
@@ -841,32 +841,18 @@ static DECLARE_WORK(sched_prefcore_work,
|
||||
|
||||
static void amd_pstate_init_prefcore(struct amd_cpudata *cpudata)
|
||||
{
|
||||
- int ret, prio;
|
||||
- u32 highest_perf;
|
||||
-
|
||||
- ret = amd_get_highest_perf(cpudata->cpu, &highest_perf);
|
||||
- if (ret)
|
||||
+ /* user disabled or not detected */
|
||||
+ if (!amd_pstate_prefcore)
|
||||
return;
|
||||
|
||||
cpudata->hw_prefcore = true;
|
||||
- /* check if CPPC preferred core feature is enabled*/
|
||||
- if (highest_perf < CPPC_MAX_PERF)
|
||||
- prio = (int)highest_perf;
|
||||
- else {
|
||||
- pr_debug("AMD CPPC preferred core is unsupported!\n");
|
||||
- cpudata->hw_prefcore = false;
|
||||
- return;
|
||||
- }
|
||||
-
|
||||
- if (!amd_pstate_prefcore)
|
||||
- return;
|
||||
|
||||
/*
|
||||
* The priorities can be set regardless of whether or not
|
||||
* sched_set_itmt_support(true) has been called and it is valid to
|
||||
* update them at any time after it has been called.
|
||||
*/
|
||||
- sched_set_itmt_core_prio(prio, cpudata->cpu);
|
||||
+ sched_set_itmt_core_prio((int)READ_ONCE(cpudata->highest_perf), cpudata->cpu);
|
||||
|
||||
schedule_work(&sched_prefcore_work);
|
||||
}
|
||||
@@ -1037,12 +1023,12 @@ static int amd_pstate_cpu_init(struct cp
|
||||
|
||||
cpudata->cpu = policy->cpu;
|
||||
|
||||
- amd_pstate_init_prefcore(cpudata);
|
||||
-
|
||||
ret = amd_pstate_init_perf(cpudata);
|
||||
if (ret)
|
||||
goto free_cpudata1;
|
||||
|
||||
+ amd_pstate_init_prefcore(cpudata);
|
||||
+
|
||||
ret = amd_pstate_init_freq(cpudata);
|
||||
if (ret)
|
||||
goto free_cpudata1;
|
||||
@@ -1493,12 +1479,12 @@ static int amd_pstate_epp_cpu_init(struc
|
||||
cpudata->cpu = policy->cpu;
|
||||
cpudata->epp_policy = 0;
|
||||
|
||||
- amd_pstate_init_prefcore(cpudata);
|
||||
-
|
||||
ret = amd_pstate_init_perf(cpudata);
|
||||
if (ret)
|
||||
goto free_cpudata1;
|
||||
|
||||
+ amd_pstate_init_prefcore(cpudata);
|
||||
+
|
||||
ret = amd_pstate_init_freq(cpudata);
|
||||
if (ret)
|
||||
goto free_cpudata1;
|
||||
@@ -1960,6 +1946,12 @@ static int __init amd_pstate_init(void)
|
||||
static_call_update(amd_pstate_update_perf, cppc_update_perf);
|
||||
}
|
||||
|
||||
+ if (amd_pstate_prefcore) {
|
||||
+ ret = amd_detect_prefcore(&amd_pstate_prefcore);
|
||||
+ if (ret)
|
||||
+ return ret;
|
||||
+ }
|
||||
+
|
||||
/* enable amd pstate feature */
|
||||
ret = amd_pstate_enable(true);
|
||||
if (ret) {
|
||||
--- a/include/acpi/cppc_acpi.h
|
||||
+++ b/include/acpi/cppc_acpi.h
|
||||
@@ -163,6 +163,7 @@ extern int cppc_get_auto_sel_caps(int cp
|
||||
extern int cppc_set_auto_sel(int cpu, bool enable);
|
||||
extern int amd_get_highest_perf(unsigned int cpu, u32 *highest_perf);
|
||||
extern int amd_get_boost_ratio_numerator(unsigned int cpu, u64 *numerator);
|
||||
+extern int amd_detect_prefcore(bool *detected);
|
||||
#else /* !CONFIG_ACPI_CPPC_LIB */
|
||||
static inline int cppc_get_desired_perf(int cpunum, u64 *desired_perf)
|
||||
{
|
||||
@@ -244,6 +245,10 @@ static inline int amd_get_boost_ratio_nu
|
||||
{
|
||||
return -EOPNOTSUPP;
|
||||
}
|
||||
+static inline int amd_detect_prefcore(bool *detected)
|
||||
+{
|
||||
+ return -ENODEV;
|
||||
+}
|
||||
#endif /* !CONFIG_ACPI_CPPC_LIB */
|
||||
|
||||
#endif /* _CPPC_ACPI_H*/
|
@ -0,0 +1,169 @@
|
||||
From 68d89574b86625f4bd7a784fe9bcc221dc290e4f Mon Sep 17 00:00:00 2001
|
||||
From: Mario Limonciello <mario.limonciello@amd.com>
|
||||
Date: Thu, 5 Sep 2024 11:30:04 -0500
|
||||
Subject: cpufreq: amd-pstate: Merge amd_pstate_highest_perf_set() into
|
||||
amd_get_boost_ratio_numerator()
|
||||
|
||||
The special case in amd_pstate_highest_perf_set() is the value used
|
||||
for calculating the boost numerator. Merge this into
|
||||
amd_get_boost_ratio_numerator() and then use that to calculate boost
|
||||
ratio.
|
||||
|
||||
This allows dropping more special casing of the highest perf value.
|
||||
|
||||
Reviewed-by: Gautham R. Shenoy <gautham.shenoy@amd.com>
|
||||
Signed-off-by: Mario Limonciello <mario.limonciello@amd.com>
|
||||
Reviewed-by: Gautham R. Shenoy <gautham.sheoy@amd.com>
|
||||
---
|
||||
Documentation/admin-guide/pm/amd-pstate.rst | 3 +-
|
||||
arch/x86/kernel/acpi/cppc.c | 16 +++++++
|
||||
drivers/cpufreq/amd-pstate.c | 52 ++++-----------------
|
||||
3 files changed, 28 insertions(+), 43 deletions(-)
|
||||
|
||||
--- a/Documentation/admin-guide/pm/amd-pstate.rst
|
||||
+++ b/Documentation/admin-guide/pm/amd-pstate.rst
|
||||
@@ -251,7 +251,8 @@ performance supported in `AMD CPPC Perfo
|
||||
In some ASICs, the highest CPPC performance is not the one in the ``_CPC``
|
||||
table, so we need to expose it to sysfs. If boost is not active, but
|
||||
still supported, this maximum frequency will be larger than the one in
|
||||
-``cpuinfo``.
|
||||
+``cpuinfo``. On systems that support preferred core, the driver will have
|
||||
+different values for some cores than others.
|
||||
This attribute is read-only.
|
||||
|
||||
``amd_pstate_lowest_nonlinear_freq``
|
||||
--- a/arch/x86/kernel/acpi/cppc.c
|
||||
+++ b/arch/x86/kernel/acpi/cppc.c
|
||||
@@ -9,6 +9,7 @@
|
||||
#include <asm/processor.h>
|
||||
#include <asm/topology.h>
|
||||
|
||||
+#define CPPC_HIGHEST_PERF_PERFORMANCE 196
|
||||
#define CPPC_HIGHEST_PERF_PREFCORE 166
|
||||
|
||||
enum amd_pref_core {
|
||||
@@ -245,6 +246,21 @@ int amd_get_boost_ratio_numerator(unsign
|
||||
*numerator = boost_numerator;
|
||||
return 0;
|
||||
}
|
||||
+
|
||||
+ /*
|
||||
+ * For AMD CPUs with Family ID 19H and Model ID range 0x70 to 0x7f,
|
||||
+ * the highest performance level is set to 196.
|
||||
+ * https://bugzilla.kernel.org/show_bug.cgi?id=218759
|
||||
+ */
|
||||
+ if (cpu_feature_enabled(X86_FEATURE_ZEN4)) {
|
||||
+ switch (boot_cpu_data.x86_model) {
|
||||
+ case 0x70 ... 0x7f:
|
||||
+ *numerator = CPPC_HIGHEST_PERF_PERFORMANCE;
|
||||
+ return 0;
|
||||
+ default:
|
||||
+ break;
|
||||
+ }
|
||||
+ }
|
||||
*numerator = CPPC_HIGHEST_PERF_PREFCORE;
|
||||
|
||||
return 0;
|
||||
--- a/drivers/cpufreq/amd-pstate.c
|
||||
+++ b/drivers/cpufreq/amd-pstate.c
|
||||
@@ -52,8 +52,6 @@
|
||||
#define AMD_PSTATE_TRANSITION_LATENCY 20000
|
||||
#define AMD_PSTATE_TRANSITION_DELAY 1000
|
||||
#define AMD_PSTATE_FAST_CPPC_TRANSITION_DELAY 600
|
||||
-#define CPPC_HIGHEST_PERF_PERFORMANCE 196
|
||||
-#define CPPC_HIGHEST_PERF_DEFAULT 166
|
||||
|
||||
#define AMD_CPPC_EPP_PERFORMANCE 0x00
|
||||
#define AMD_CPPC_EPP_BALANCE_PERFORMANCE 0x80
|
||||
@@ -398,43 +396,17 @@ static inline int amd_pstate_enable(bool
|
||||
return static_call(amd_pstate_enable)(enable);
|
||||
}
|
||||
|
||||
-static u32 amd_pstate_highest_perf_set(struct amd_cpudata *cpudata)
|
||||
-{
|
||||
- struct cpuinfo_x86 *c = &cpu_data(0);
|
||||
-
|
||||
- /*
|
||||
- * For AMD CPUs with Family ID 19H and Model ID range 0x70 to 0x7f,
|
||||
- * the highest performance level is set to 196.
|
||||
- * https://bugzilla.kernel.org/show_bug.cgi?id=218759
|
||||
- */
|
||||
- if (c->x86 == 0x19 && (c->x86_model >= 0x70 && c->x86_model <= 0x7f))
|
||||
- return CPPC_HIGHEST_PERF_PERFORMANCE;
|
||||
-
|
||||
- return CPPC_HIGHEST_PERF_DEFAULT;
|
||||
-}
|
||||
-
|
||||
static int pstate_init_perf(struct amd_cpudata *cpudata)
|
||||
{
|
||||
u64 cap1;
|
||||
- u32 highest_perf;
|
||||
|
||||
int ret = rdmsrl_safe_on_cpu(cpudata->cpu, MSR_AMD_CPPC_CAP1,
|
||||
&cap1);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
- /* For platforms that do not support the preferred core feature, the
|
||||
- * highest_pef may be configured with 166 or 255, to avoid max frequency
|
||||
- * calculated wrongly. we take the AMD_CPPC_HIGHEST_PERF(cap1) value as
|
||||
- * the default max perf.
|
||||
- */
|
||||
- if (cpudata->hw_prefcore)
|
||||
- highest_perf = amd_pstate_highest_perf_set(cpudata);
|
||||
- else
|
||||
- highest_perf = AMD_CPPC_HIGHEST_PERF(cap1);
|
||||
-
|
||||
- WRITE_ONCE(cpudata->highest_perf, highest_perf);
|
||||
- WRITE_ONCE(cpudata->max_limit_perf, highest_perf);
|
||||
+ WRITE_ONCE(cpudata->highest_perf, AMD_CPPC_HIGHEST_PERF(cap1));
|
||||
+ WRITE_ONCE(cpudata->max_limit_perf, AMD_CPPC_HIGHEST_PERF(cap1));
|
||||
WRITE_ONCE(cpudata->nominal_perf, AMD_CPPC_NOMINAL_PERF(cap1));
|
||||
WRITE_ONCE(cpudata->lowest_nonlinear_perf, AMD_CPPC_LOWNONLIN_PERF(cap1));
|
||||
WRITE_ONCE(cpudata->lowest_perf, AMD_CPPC_LOWEST_PERF(cap1));
|
||||
@@ -446,19 +418,13 @@ static int pstate_init_perf(struct amd_c
|
||||
static int cppc_init_perf(struct amd_cpudata *cpudata)
|
||||
{
|
||||
struct cppc_perf_caps cppc_perf;
|
||||
- u32 highest_perf;
|
||||
|
||||
int ret = cppc_get_perf_caps(cpudata->cpu, &cppc_perf);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
- if (cpudata->hw_prefcore)
|
||||
- highest_perf = amd_pstate_highest_perf_set(cpudata);
|
||||
- else
|
||||
- highest_perf = cppc_perf.highest_perf;
|
||||
-
|
||||
- WRITE_ONCE(cpudata->highest_perf, highest_perf);
|
||||
- WRITE_ONCE(cpudata->max_limit_perf, highest_perf);
|
||||
+ WRITE_ONCE(cpudata->highest_perf, cppc_perf.highest_perf);
|
||||
+ WRITE_ONCE(cpudata->max_limit_perf, cppc_perf.highest_perf);
|
||||
WRITE_ONCE(cpudata->nominal_perf, cppc_perf.nominal_perf);
|
||||
WRITE_ONCE(cpudata->lowest_nonlinear_perf,
|
||||
cppc_perf.lowest_nonlinear_perf);
|
||||
@@ -944,8 +910,8 @@ static u32 amd_pstate_get_transition_lat
|
||||
static int amd_pstate_init_freq(struct amd_cpudata *cpudata)
|
||||
{
|
||||
int ret;
|
||||
- u32 min_freq;
|
||||
- u32 highest_perf, max_freq;
|
||||
+ u32 min_freq, max_freq;
|
||||
+ u64 numerator;
|
||||
u32 nominal_perf, nominal_freq;
|
||||
u32 lowest_nonlinear_perf, lowest_nonlinear_freq;
|
||||
u32 boost_ratio, lowest_nonlinear_ratio;
|
||||
@@ -967,8 +933,10 @@ static int amd_pstate_init_freq(struct a
|
||||
|
||||
nominal_perf = READ_ONCE(cpudata->nominal_perf);
|
||||
|
||||
- highest_perf = READ_ONCE(cpudata->highest_perf);
|
||||
- boost_ratio = div_u64(highest_perf << SCHED_CAPACITY_SHIFT, nominal_perf);
|
||||
+ ret = amd_get_boost_ratio_numerator(cpudata->cpu, &numerator);
|
||||
+ if (ret)
|
||||
+ return ret;
|
||||
+ boost_ratio = div_u64(numerator << SCHED_CAPACITY_SHIFT, nominal_perf);
|
||||
max_freq = (nominal_freq * boost_ratio >> SCHED_CAPACITY_SHIFT) * 1000;
|
||||
|
||||
lowest_nonlinear_perf = READ_ONCE(cpudata->lowest_nonlinear_perf);
|
@ -0,0 +1,42 @@
|
||||
From deed718125e73b6bf280dcebb80c39108226388c Mon Sep 17 00:00:00 2001
|
||||
From: Mario Limonciello <mario.limonciello@amd.com>
|
||||
Date: Thu, 5 Sep 2024 11:30:05 -0500
|
||||
Subject: cpufreq: amd-pstate: Optimize amd_pstate_update_limits()
|
||||
|
||||
Don't take and release the mutex when prefcore isn't present and
|
||||
avoid initialization of variables that will be initially set
|
||||
in the function.
|
||||
|
||||
Reviewed-by: Gautham R. Shenoy <gautham.shenoy@amd.com>
|
||||
Reviewed-by: Perry Yuan <perry.yuan@amd.com>
|
||||
Signed-off-by: Mario Limonciello <mario.limonciello@amd.com>
|
||||
Reviewed-by: Gautham R. Shenoy <gautham.sheoy@amd.com>
|
||||
---
|
||||
drivers/cpufreq/amd-pstate.c | 10 +++++-----
|
||||
1 file changed, 5 insertions(+), 5 deletions(-)
|
||||
|
||||
--- a/drivers/cpufreq/amd-pstate.c
|
||||
+++ b/drivers/cpufreq/amd-pstate.c
|
||||
@@ -836,17 +836,17 @@ static void amd_pstate_update_limits(uns
|
||||
|
||||
cpudata = policy->driver_data;
|
||||
|
||||
- mutex_lock(&amd_pstate_driver_lock);
|
||||
- if ((!amd_pstate_prefcore) || (!cpudata->hw_prefcore))
|
||||
- goto free_cpufreq_put;
|
||||
+ if (!amd_pstate_prefcore)
|
||||
+ return;
|
||||
|
||||
+ mutex_lock(&amd_pstate_driver_lock);
|
||||
ret = amd_get_highest_perf(cpu, &cur_high);
|
||||
if (ret)
|
||||
goto free_cpufreq_put;
|
||||
|
||||
prev_high = READ_ONCE(cpudata->prefcore_ranking);
|
||||
- if (prev_high != cur_high) {
|
||||
- highest_perf_changed = true;
|
||||
+ highest_perf_changed = (prev_high != cur_high);
|
||||
+ if (highest_perf_changed) {
|
||||
WRITE_ONCE(cpudata->prefcore_ranking, cur_high);
|
||||
|
||||
if (cur_high < CPPC_MAX_PERF)
|
@ -0,0 +1,29 @@
|
||||
From 391075a34e392c7cacd338a6b034a21a10679855 Mon Sep 17 00:00:00 2001
|
||||
From: Mario Limonciello <mario.limonciello@amd.com>
|
||||
Date: Thu, 5 Sep 2024 11:30:06 -0500
|
||||
Subject: cpufreq: amd-pstate: Add documentation for `amd_pstate_hw_prefcore`
|
||||
|
||||
Explain that the sysfs file represents both preferred core being
|
||||
enabled by the user and supported by the hardware.
|
||||
|
||||
Reviewed-by: Gautham R. Shenoy <gautham.shenoy@amd.com>
|
||||
Signed-off-by: Mario Limonciello <mario.limonciello@amd.com>
|
||||
Reviewed-by: Gautham R. Shenoy <gautham.sheoy@amd.com>
|
||||
---
|
||||
Documentation/admin-guide/pm/amd-pstate.rst | 5 +++++
|
||||
1 file changed, 5 insertions(+)
|
||||
|
||||
--- a/Documentation/admin-guide/pm/amd-pstate.rst
|
||||
+++ b/Documentation/admin-guide/pm/amd-pstate.rst
|
||||
@@ -263,6 +263,11 @@ lowest non-linear performance in `AMD CP
|
||||
<perf_cap_>`_.)
|
||||
This attribute is read-only.
|
||||
|
||||
+``amd_pstate_hw_prefcore``
|
||||
+
|
||||
+Whether the platform supports the preferred core feature and it has been
|
||||
+enabled. This attribute is read-only.
|
||||
+
|
||||
``energy_performance_available_preferences``
|
||||
|
||||
A list of all the supported EPP preferences that could be used for
|
@ -0,0 +1,42 @@
|
||||
From 2ed9874f6dcafcc2bee7a922af9e1d1c62dbeb18 Mon Sep 17 00:00:00 2001
|
||||
From: Mario Limonciello <mario.limonciello@amd.com>
|
||||
Date: Thu, 5 Sep 2024 11:30:07 -0500
|
||||
Subject: amd-pstate: Add missing documentation for
|
||||
`amd_pstate_prefcore_ranking`
|
||||
|
||||
`amd_pstate_prefcore_ranking` reflects the dynamic rankings of a CPU
|
||||
core based on platform conditions. Explicitly include it in the
|
||||
documentation.
|
||||
|
||||
Reviewed-by: Gautham R. Shenoy <gautham.shenoy@amd.com>
|
||||
Signed-off-by: Mario Limonciello <mario.limonciello@amd.com>
|
||||
Reviewed-by: Gautham R. Shenoy <gautham.sheoy@amd.com>
|
||||
---
|
||||
Documentation/admin-guide/pm/amd-pstate.rst | 9 ++++++++-
|
||||
1 file changed, 8 insertions(+), 1 deletion(-)
|
||||
|
||||
--- a/Documentation/admin-guide/pm/amd-pstate.rst
|
||||
+++ b/Documentation/admin-guide/pm/amd-pstate.rst
|
||||
@@ -252,7 +252,8 @@ In some ASICs, the highest CPPC performa
|
||||
table, so we need to expose it to sysfs. If boost is not active, but
|
||||
still supported, this maximum frequency will be larger than the one in
|
||||
``cpuinfo``. On systems that support preferred core, the driver will have
|
||||
-different values for some cores than others.
|
||||
+different values for some cores than others and this will reflect the values
|
||||
+advertised by the platform at bootup.
|
||||
This attribute is read-only.
|
||||
|
||||
``amd_pstate_lowest_nonlinear_freq``
|
||||
@@ -268,6 +269,12 @@ This attribute is read-only.
|
||||
Whether the platform supports the preferred core feature and it has been
|
||||
enabled. This attribute is read-only.
|
||||
|
||||
+``amd_pstate_prefcore_ranking``
|
||||
+
|
||||
+The performance ranking of the core. This number doesn't have any unit, but
|
||||
+larger numbers are preferred at the time of reading. This can change at
|
||||
+runtime based on platform conditions. This attribute is read-only.
|
||||
+
|
||||
``energy_performance_available_preferences``
|
||||
|
||||
A list of all the supported EPP preferences that could be used for
|
24
debian/patches/patchset-pf/amd-pstate/0016-cpufreq-amd-pstate-Fix-non-kerneldoc-comment.patch
vendored
Normal file
24
debian/patches/patchset-pf/amd-pstate/0016-cpufreq-amd-pstate-Fix-non-kerneldoc-comment.patch
vendored
Normal file
@ -0,0 +1,24 @@
|
||||
From 2e2ba39aec71fb51e897c3275b255ef806800cf0 Mon Sep 17 00:00:00 2001
|
||||
From: Mario Limonciello <mario.limonciello@amd.com>
|
||||
Date: Thu, 5 Sep 2024 11:23:51 -0500
|
||||
Subject: cpufreq/amd-pstate: Fix non kerneldoc comment
|
||||
|
||||
The comment for amd_cppc_supported() isn't meant to be kernel doc.
|
||||
|
||||
Fixes: cb817ec6673b7 ("cpufreq: amd-pstate: show CPPC debug message if CPPC is not supported")
|
||||
Signed-off-by: Mario Limonciello <mario.limonciello@amd.com>
|
||||
---
|
||||
drivers/cpufreq/amd-pstate.c | 2 +-
|
||||
1 file changed, 1 insertion(+), 1 deletion(-)
|
||||
|
||||
--- a/drivers/cpufreq/amd-pstate.c
|
||||
+++ b/drivers/cpufreq/amd-pstate.c
|
||||
@@ -1786,7 +1786,7 @@ static int __init amd_pstate_set_driver(
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
-/**
|
||||
+/*
|
||||
* CPPC function is not supported for family ID 17H with model_ID ranging from 0x10 to 0x2F.
|
||||
* show the debug message that helps to check if the CPU has CPPC support for loading issue.
|
||||
*/
|
@ -0,0 +1,24 @@
|
||||
From 185e64a7e1a749593f3d6dadc666da9dda82d48c Mon Sep 17 00:00:00 2001
|
||||
From: Qianqiang Liu <qianqiang.liu@163.com>
|
||||
Date: Wed, 11 Sep 2024 07:39:24 +0800
|
||||
Subject: cpufreq/amd-pstate-ut: Fix an "Uninitialized variables" issue
|
||||
|
||||
Using uninitialized value "mode2" when calling "amd_pstate_get_mode_string".
|
||||
Set "mode2" to "AMD_PSTATE_DISABLE" by default.
|
||||
|
||||
Signed-off-by: Qianqiang Liu <qianqiang.liu@163.com>
|
||||
---
|
||||
drivers/cpufreq/amd-pstate-ut.c | 2 +-
|
||||
1 file changed, 1 insertion(+), 1 deletion(-)
|
||||
|
||||
--- a/drivers/cpufreq/amd-pstate-ut.c
|
||||
+++ b/drivers/cpufreq/amd-pstate-ut.c
|
||||
@@ -270,7 +270,7 @@ static int amd_pstate_set_mode(enum amd_
|
||||
|
||||
static void amd_pstate_ut_check_driver(u32 index)
|
||||
{
|
||||
- enum amd_pstate_mode mode1, mode2;
|
||||
+ enum amd_pstate_mode mode1, mode2 = AMD_PSTATE_DISABLE;
|
||||
int ret;
|
||||
|
||||
for (mode1 = AMD_PSTATE_DISABLE; mode1 < AMD_PSTATE_MAX; mode1++) {
|
@ -0,0 +1,108 @@
|
||||
From d74ce254cc470da670d6b90c69bab553cdbde62b Mon Sep 17 00:00:00 2001
|
||||
From: Dhananjay Ugwekar <Dhananjay.Ugwekar@amd.com>
|
||||
Date: Tue, 17 Sep 2024 09:14:35 +0000
|
||||
Subject: cpufreq/amd-pstate: Rename MSR and shared memory specific functions
|
||||
|
||||
Existing function names "cppc_*" and "pstate_*" for shared memory and
|
||||
MSR based systems are not intuitive enough, replace them with "shmem_*" and
|
||||
"msr_*" respectively.
|
||||
|
||||
Signed-off-by: Dhananjay Ugwekar <Dhananjay.Ugwekar@amd.com>
|
||||
---
|
||||
drivers/cpufreq/amd-pstate.c | 24 ++++++++++++------------
|
||||
1 file changed, 12 insertions(+), 12 deletions(-)
|
||||
|
||||
--- a/drivers/cpufreq/amd-pstate.c
|
||||
+++ b/drivers/cpufreq/amd-pstate.c
|
||||
@@ -263,7 +263,7 @@ static int amd_pstate_get_energy_pref_in
|
||||
return index;
|
||||
}
|
||||
|
||||
-static void pstate_update_perf(struct amd_cpudata *cpudata, u32 min_perf,
|
||||
+static void msr_update_perf(struct amd_cpudata *cpudata, u32 min_perf,
|
||||
u32 des_perf, u32 max_perf, bool fast_switch)
|
||||
{
|
||||
if (fast_switch)
|
||||
@@ -273,7 +273,7 @@ static void pstate_update_perf(struct am
|
||||
READ_ONCE(cpudata->cppc_req_cached));
|
||||
}
|
||||
|
||||
-DEFINE_STATIC_CALL(amd_pstate_update_perf, pstate_update_perf);
|
||||
+DEFINE_STATIC_CALL(amd_pstate_update_perf, msr_update_perf);
|
||||
|
||||
static inline void amd_pstate_update_perf(struct amd_cpudata *cpudata,
|
||||
u32 min_perf, u32 des_perf,
|
||||
@@ -336,7 +336,7 @@ static int amd_pstate_set_energy_pref_in
|
||||
return ret;
|
||||
}
|
||||
|
||||
-static inline int pstate_enable(bool enable)
|
||||
+static inline int msr_enable(bool enable)
|
||||
{
|
||||
int ret, cpu;
|
||||
unsigned long logical_proc_id_mask = 0;
|
||||
@@ -362,7 +362,7 @@ static inline int pstate_enable(bool ena
|
||||
return 0;
|
||||
}
|
||||
|
||||
-static int cppc_enable(bool enable)
|
||||
+static int shmem_enable(bool enable)
|
||||
{
|
||||
int cpu, ret = 0;
|
||||
struct cppc_perf_ctrls perf_ctrls;
|
||||
@@ -389,14 +389,14 @@ static int cppc_enable(bool enable)
|
||||
return ret;
|
||||
}
|
||||
|
||||
-DEFINE_STATIC_CALL(amd_pstate_enable, pstate_enable);
|
||||
+DEFINE_STATIC_CALL(amd_pstate_enable, msr_enable);
|
||||
|
||||
static inline int amd_pstate_enable(bool enable)
|
||||
{
|
||||
return static_call(amd_pstate_enable)(enable);
|
||||
}
|
||||
|
||||
-static int pstate_init_perf(struct amd_cpudata *cpudata)
|
||||
+static int msr_init_perf(struct amd_cpudata *cpudata)
|
||||
{
|
||||
u64 cap1;
|
||||
|
||||
@@ -415,7 +415,7 @@ static int pstate_init_perf(struct amd_c
|
||||
return 0;
|
||||
}
|
||||
|
||||
-static int cppc_init_perf(struct amd_cpudata *cpudata)
|
||||
+static int shmem_init_perf(struct amd_cpudata *cpudata)
|
||||
{
|
||||
struct cppc_perf_caps cppc_perf;
|
||||
|
||||
@@ -450,14 +450,14 @@ static int cppc_init_perf(struct amd_cpu
|
||||
return ret;
|
||||
}
|
||||
|
||||
-DEFINE_STATIC_CALL(amd_pstate_init_perf, pstate_init_perf);
|
||||
+DEFINE_STATIC_CALL(amd_pstate_init_perf, msr_init_perf);
|
||||
|
||||
static inline int amd_pstate_init_perf(struct amd_cpudata *cpudata)
|
||||
{
|
||||
return static_call(amd_pstate_init_perf)(cpudata);
|
||||
}
|
||||
|
||||
-static void cppc_update_perf(struct amd_cpudata *cpudata,
|
||||
+static void shmem_update_perf(struct amd_cpudata *cpudata,
|
||||
u32 min_perf, u32 des_perf,
|
||||
u32 max_perf, bool fast_switch)
|
||||
{
|
||||
@@ -1909,9 +1909,9 @@ static int __init amd_pstate_init(void)
|
||||
current_pstate_driver->adjust_perf = amd_pstate_adjust_perf;
|
||||
} else {
|
||||
pr_debug("AMD CPPC shared memory based functionality is supported\n");
|
||||
- static_call_update(amd_pstate_enable, cppc_enable);
|
||||
- static_call_update(amd_pstate_init_perf, cppc_init_perf);
|
||||
- static_call_update(amd_pstate_update_perf, cppc_update_perf);
|
||||
+ static_call_update(amd_pstate_enable, shmem_enable);
|
||||
+ static_call_update(amd_pstate_init_perf, shmem_init_perf);
|
||||
+ static_call_update(amd_pstate_update_perf, shmem_update_perf);
|
||||
}
|
||||
|
||||
if (amd_pstate_prefcore) {
|
@ -0,0 +1,115 @@
|
||||
From 787175146e26a199c06be4e6bf8cf8da0f757271 Mon Sep 17 00:00:00 2001
|
||||
From: Dhananjay Ugwekar <Dhananjay.Ugwekar@amd.com>
|
||||
Date: Thu, 3 Oct 2024 08:39:52 +0000
|
||||
Subject: cpufreq: Add a callback to update the min_freq_req from drivers
|
||||
|
||||
Currently, there is no proper way to update the initial lower frequency
|
||||
limit from cpufreq drivers. Only way is to add a new min_freq qos
|
||||
request from the driver side, but it leads to the issue explained below.
|
||||
|
||||
The QoS infrastructure collates the constraints from multiple
|
||||
subsystems and saves them in a plist. The "current value" is defined to
|
||||
be the highest value in the plist for min_freq constraint.
|
||||
|
||||
The cpufreq core adds a qos_request for min_freq to be 0 and the amd-pstate
|
||||
driver today adds qos request for min_freq to be lowest_freq, where
|
||||
lowest_freq corresponds to CPPC.lowest_perf.
|
||||
|
||||
Eg: Suppose WLOG considering amd-pstate driver, lowest_freq is 400000 KHz,
|
||||
lowest_non_linear_freq is 1200000 KHz.
|
||||
|
||||
At this point of time, the min_freq QoS plist looks like:
|
||||
|
||||
head--> 400000 KHz (registered by amd-pstate) --> 0 KHz (registered by
|
||||
cpufreq core)
|
||||
|
||||
When a user updates /sys/devices/system/cpu/cpuX/cpufreq/scaling_min_freq,
|
||||
it only results in updating the cpufreq-core's node in the plist, where
|
||||
say 0 becomes the newly echoed value.
|
||||
|
||||
Now, if the user echoes a value 1000000 KHz, to scaling_min_freq, then the
|
||||
new list would be
|
||||
|
||||
head--> 1000000 KHz (registered by cpufreq core) --> 400000 KHz (registered
|
||||
by amd-pstate)
|
||||
|
||||
and the new "current value" of the min_freq QoS constraint will be 1000000
|
||||
KHz, this is the scenario where it works as expected.
|
||||
|
||||
Suppose we change the amd-pstate driver code's min_freq qos constraint
|
||||
to lowest_non_linear_freq instead of lowest_freq, then the user will
|
||||
never be able to request a value below that, due to the following:
|
||||
|
||||
At boot time, the min_freq QoS plist would be
|
||||
|
||||
head--> 1200000 KHz (registered by amd-pstate) --> 0 KHz (registered by
|
||||
cpufreq core)
|
||||
|
||||
When the user echoes a value of 1000000 KHz, to
|
||||
/sys/devices/..../scaling_min_freq, then the new list would be
|
||||
|
||||
head--> 1200000 KHz (registered by amd-pstate) --> 1000000 KHz (registered
|
||||
by cpufreq core)
|
||||
|
||||
with the new "current value" of the min_freq QoS remaining 1200000 KHz.
|
||||
Since the current value has not changed, there won't be any notifications
|
||||
sent to the subsystems which have added their QoS constraints. In
|
||||
particular, the amd-pstate driver will not get the notification, and thus,
|
||||
the user's request to lower the scaling_min_freq will be ineffective.
|
||||
|
||||
Hence, it is advisable to have a single source of truth for the min and
|
||||
max freq QoS constraints between the cpufreq and the cpufreq drivers.
|
||||
|
||||
So add a new callback get_init_min_freq() add in struct cpufreq_driver,
|
||||
which allows amd-pstate (or any other cpufreq driver) to override the
|
||||
default min_freq value being set in the policy->min_freq_req. Now
|
||||
scaling_min_freq can be modified by the user to any value (lower or
|
||||
higher than the init value) later on if desired.
|
||||
|
||||
Signed-off-by: Dhananjay Ugwekar <Dhananjay.Ugwekar@amd.com>
|
||||
Reviewed-by: Gautham R. Shenoy <gautham.shenoy@amd.com>
|
||||
Reviewed-by: Mario Limonciello <mario.limonciello@amd.com>
|
||||
---
|
||||
drivers/cpufreq/cpufreq.c | 6 +++++-
|
||||
include/linux/cpufreq.h | 6 ++++++
|
||||
2 files changed, 11 insertions(+), 1 deletion(-)
|
||||
|
||||
--- a/drivers/cpufreq/cpufreq.c
|
||||
+++ b/drivers/cpufreq/cpufreq.c
|
||||
@@ -1380,6 +1380,7 @@ static int cpufreq_online(unsigned int c
|
||||
bool new_policy;
|
||||
unsigned long flags;
|
||||
unsigned int j;
|
||||
+ u32 init_min_freq = FREQ_QOS_MIN_DEFAULT_VALUE;
|
||||
int ret;
|
||||
|
||||
pr_debug("%s: bringing CPU%u online\n", __func__, cpu);
|
||||
@@ -1464,9 +1465,12 @@ static int cpufreq_online(unsigned int c
|
||||
goto out_destroy_policy;
|
||||
}
|
||||
|
||||
+ if (cpufreq_driver->get_init_min_freq)
|
||||
+ init_min_freq = cpufreq_driver->get_init_min_freq(policy);
|
||||
+
|
||||
ret = freq_qos_add_request(&policy->constraints,
|
||||
policy->min_freq_req, FREQ_QOS_MIN,
|
||||
- FREQ_QOS_MIN_DEFAULT_VALUE);
|
||||
+ init_min_freq);
|
||||
if (ret < 0) {
|
||||
/*
|
||||
* So we don't call freq_qos_remove_request() for an
|
||||
--- a/include/linux/cpufreq.h
|
||||
+++ b/include/linux/cpufreq.h
|
||||
@@ -414,6 +414,12 @@ struct cpufreq_driver {
|
||||
* policy is properly initialized, but before the governor is started.
|
||||
*/
|
||||
void (*register_em)(struct cpufreq_policy *policy);
|
||||
+
|
||||
+ /*
|
||||
+ * Set by drivers that want to initialize the policy->min_freq_req with
|
||||
+ * a value different from the default value (0) in cpufreq core.
|
||||
+ */
|
||||
+ int (*get_init_min_freq)(struct cpufreq_policy *policy);
|
||||
};
|
||||
|
||||
/* flags */
|
@ -0,0 +1,79 @@
|
||||
From f5b234be445a45b0bcacc37e0aad7a6bc7900eac Mon Sep 17 00:00:00 2001
|
||||
From: Dhananjay Ugwekar <Dhananjay.Ugwekar@amd.com>
|
||||
Date: Thu, 3 Oct 2024 08:39:54 +0000
|
||||
Subject: cpufreq/amd-pstate: Set the initial min_freq to lowest_nonlinear_freq
|
||||
|
||||
According to the AMD architectural programmer's manual volume 2 [1], in
|
||||
section "17.6.4.1 CPPC_CAPABILITY_1" lowest_nonlinear_perf is described
|
||||
as "Reports the most energy efficient performance level (in terms of
|
||||
performance per watt). Above this threshold, lower performance levels
|
||||
generally result in increased energy efficiency. Reducing performance
|
||||
below this threshold does not result in total energy savings for a given
|
||||
computation, although it reduces instantaneous power consumption". So
|
||||
lowest_nonlinear_perf is the most power efficient performance level, and
|
||||
going below that would lead to a worse performance/watt.
|
||||
|
||||
Also, setting the minimum frequency to lowest_nonlinear_freq (instead of
|
||||
lowest_freq) allows the CPU to idle at a higher frequency which leads
|
||||
to more time being spent in a deeper idle state (as trivial idle tasks
|
||||
are completed sooner). This has shown a power benefit in some systems,
|
||||
in other systems, power consumption has increased but so has the
|
||||
throughput/watt.
|
||||
|
||||
Use the get_init_min_freq() callback to set the initial lower limit for
|
||||
amd-pstate driver to lowest_nonlinear_freq instead of lowest_freq.
|
||||
|
||||
Link: https://www.amd.com/content/dam/amd/en/documents/processor-tech-docs/programmer-references/24593.pdf [1]
|
||||
|
||||
Signed-off-by: Dhananjay Ugwekar <Dhananjay.Ugwekar@amd.com>
|
||||
Reviewed-by: Mario Limonciello <mario.limonciello@amd.com>
|
||||
---
|
||||
drivers/cpufreq/amd-pstate.c | 16 +++++++++-------
|
||||
1 file changed, 9 insertions(+), 7 deletions(-)
|
||||
|
||||
--- a/drivers/cpufreq/amd-pstate.c
|
||||
+++ b/drivers/cpufreq/amd-pstate.c
|
||||
@@ -1025,13 +1025,6 @@ static int amd_pstate_cpu_init(struct cp
|
||||
if (cpu_feature_enabled(X86_FEATURE_CPPC))
|
||||
policy->fast_switch_possible = true;
|
||||
|
||||
- ret = freq_qos_add_request(&policy->constraints, &cpudata->req[0],
|
||||
- FREQ_QOS_MIN, policy->cpuinfo.min_freq);
|
||||
- if (ret < 0) {
|
||||
- dev_err(dev, "Failed to add min-freq constraint (%d)\n", ret);
|
||||
- goto free_cpudata1;
|
||||
- }
|
||||
-
|
||||
ret = freq_qos_add_request(&policy->constraints, &cpudata->req[1],
|
||||
FREQ_QOS_MAX, policy->cpuinfo.max_freq);
|
||||
if (ret < 0) {
|
||||
@@ -1736,6 +1729,13 @@ static int amd_pstate_epp_resume(struct
|
||||
return 0;
|
||||
}
|
||||
|
||||
+static int amd_pstate_get_init_min_freq(struct cpufreq_policy *policy)
|
||||
+{
|
||||
+ struct amd_cpudata *cpudata = policy->driver_data;
|
||||
+
|
||||
+ return READ_ONCE(cpudata->lowest_nonlinear_freq);
|
||||
+}
|
||||
+
|
||||
static struct cpufreq_driver amd_pstate_driver = {
|
||||
.flags = CPUFREQ_CONST_LOOPS | CPUFREQ_NEED_UPDATE_LIMITS,
|
||||
.verify = amd_pstate_verify,
|
||||
@@ -1749,6 +1749,7 @@ static struct cpufreq_driver amd_pstate_
|
||||
.update_limits = amd_pstate_update_limits,
|
||||
.name = "amd-pstate",
|
||||
.attr = amd_pstate_attr,
|
||||
+ .get_init_min_freq = amd_pstate_get_init_min_freq,
|
||||
};
|
||||
|
||||
static struct cpufreq_driver amd_pstate_epp_driver = {
|
||||
@@ -1765,6 +1766,7 @@ static struct cpufreq_driver amd_pstate_
|
||||
.set_boost = amd_pstate_set_boost,
|
||||
.name = "amd-pstate-epp",
|
||||
.attr = amd_pstate_epp_attr,
|
||||
+ .get_init_min_freq = amd_pstate_get_init_min_freq,
|
||||
};
|
||||
|
||||
static int __init amd_pstate_set_driver(int mode_idx)
|
@ -0,0 +1,103 @@
|
||||
From f7b2b3a1c0d015c4272793bed89734c5cffb354c Mon Sep 17 00:00:00 2001
|
||||
From: Dhananjay Ugwekar <Dhananjay.Ugwekar@amd.com>
|
||||
Date: Thu, 3 Oct 2024 08:39:56 +0000
|
||||
Subject: cpufreq/amd-pstate: Cleanup the old min_freq qos request remnants
|
||||
|
||||
Convert the freq_qos_request array in struct amd_cpudata to a single
|
||||
variable (only for max_freq request). Remove the references to cpudata->req
|
||||
array. Remove and rename the jump labels accordingly.
|
||||
|
||||
Signed-off-by: Dhananjay Ugwekar <Dhananjay.Ugwekar@amd.com>
|
||||
Reviewed-by: Mario Limonciello <mario.limonciello@amd.com>
|
||||
---
|
||||
drivers/cpufreq/amd-pstate.c | 19 ++++++++-----------
|
||||
drivers/cpufreq/amd-pstate.h | 4 ++--
|
||||
2 files changed, 10 insertions(+), 13 deletions(-)
|
||||
|
||||
--- a/drivers/cpufreq/amd-pstate.c
|
||||
+++ b/drivers/cpufreq/amd-pstate.c
|
||||
@@ -726,7 +726,7 @@ static int amd_pstate_cpu_boost_update(s
|
||||
policy->max = policy->cpuinfo.max_freq;
|
||||
|
||||
if (cppc_state == AMD_PSTATE_PASSIVE) {
|
||||
- ret = freq_qos_update_request(&cpudata->req[1], policy->cpuinfo.max_freq);
|
||||
+ ret = freq_qos_update_request(&cpudata->max_freq_req, policy->cpuinfo.max_freq);
|
||||
if (ret < 0)
|
||||
pr_debug("Failed to update freq constraint: CPU%d\n", cpudata->cpu);
|
||||
}
|
||||
@@ -993,17 +993,17 @@ static int amd_pstate_cpu_init(struct cp
|
||||
|
||||
ret = amd_pstate_init_perf(cpudata);
|
||||
if (ret)
|
||||
- goto free_cpudata1;
|
||||
+ goto free_cpudata;
|
||||
|
||||
amd_pstate_init_prefcore(cpudata);
|
||||
|
||||
ret = amd_pstate_init_freq(cpudata);
|
||||
if (ret)
|
||||
- goto free_cpudata1;
|
||||
+ goto free_cpudata;
|
||||
|
||||
ret = amd_pstate_init_boost_support(cpudata);
|
||||
if (ret)
|
||||
- goto free_cpudata1;
|
||||
+ goto free_cpudata;
|
||||
|
||||
min_freq = READ_ONCE(cpudata->min_freq);
|
||||
max_freq = READ_ONCE(cpudata->max_freq);
|
||||
@@ -1025,11 +1025,11 @@ static int amd_pstate_cpu_init(struct cp
|
||||
if (cpu_feature_enabled(X86_FEATURE_CPPC))
|
||||
policy->fast_switch_possible = true;
|
||||
|
||||
- ret = freq_qos_add_request(&policy->constraints, &cpudata->req[1],
|
||||
+ ret = freq_qos_add_request(&policy->constraints, &cpudata->max_freq_req,
|
||||
FREQ_QOS_MAX, policy->cpuinfo.max_freq);
|
||||
if (ret < 0) {
|
||||
dev_err(dev, "Failed to add max-freq constraint (%d)\n", ret);
|
||||
- goto free_cpudata2;
|
||||
+ goto free_cpudata;
|
||||
}
|
||||
|
||||
cpudata->max_limit_freq = max_freq;
|
||||
@@ -1042,9 +1042,7 @@ static int amd_pstate_cpu_init(struct cp
|
||||
|
||||
return 0;
|
||||
|
||||
-free_cpudata2:
|
||||
- freq_qos_remove_request(&cpudata->req[0]);
|
||||
-free_cpudata1:
|
||||
+free_cpudata:
|
||||
kfree(cpudata);
|
||||
return ret;
|
||||
}
|
||||
@@ -1053,8 +1051,7 @@ static void amd_pstate_cpu_exit(struct c
|
||||
{
|
||||
struct amd_cpudata *cpudata = policy->driver_data;
|
||||
|
||||
- freq_qos_remove_request(&cpudata->req[1]);
|
||||
- freq_qos_remove_request(&cpudata->req[0]);
|
||||
+ freq_qos_remove_request(&cpudata->max_freq_req);
|
||||
policy->fast_switch_possible = false;
|
||||
kfree(cpudata);
|
||||
}
|
||||
--- a/drivers/cpufreq/amd-pstate.h
|
||||
+++ b/drivers/cpufreq/amd-pstate.h
|
||||
@@ -28,7 +28,7 @@ struct amd_aperf_mperf {
|
||||
/**
|
||||
* struct amd_cpudata - private CPU data for AMD P-State
|
||||
* @cpu: CPU number
|
||||
- * @req: constraint request to apply
|
||||
+ * @max_freq_req: maximum frequency constraint request to apply
|
||||
* @cppc_req_cached: cached performance request hints
|
||||
* @highest_perf: the maximum performance an individual processor may reach,
|
||||
* assuming ideal conditions
|
||||
@@ -68,7 +68,7 @@ struct amd_aperf_mperf {
|
||||
struct amd_cpudata {
|
||||
int cpu;
|
||||
|
||||
- struct freq_qos_request req[2];
|
||||
+ struct freq_qos_request max_freq_req;
|
||||
u64 cppc_req_cached;
|
||||
|
||||
u32 highest_perf;
|
@ -0,0 +1,42 @@
|
||||
From d1216c052bedbf6d79e4b0261e2f09e17c66ffd3 Mon Sep 17 00:00:00 2001
|
||||
From: Dhananjay Ugwekar <Dhananjay.Ugwekar@amd.com>
|
||||
Date: Fri, 4 Oct 2024 12:23:04 +0000
|
||||
Subject: cpufreq/amd-pstate: Fix amd_pstate mode switch on shared memory
|
||||
systems
|
||||
|
||||
While switching the driver mode between active and passive, Collaborative
|
||||
Processor Performance Control (CPPC) is disabled in
|
||||
amd_pstate_unregister_driver(). But, it is not enabled back while registering
|
||||
the new driver (passive or active). This leads to the new driver mode not
|
||||
working correctly, so enable it back in amd_pstate_register_driver().
|
||||
|
||||
Fixes: 3ca7bc818d8c ("cpufreq: amd-pstate: Add guided mode control support via sysfs")
|
||||
Signed-off-by: Dhananjay Ugwekar <Dhananjay.Ugwekar@amd.com>
|
||||
---
|
||||
drivers/cpufreq/amd-pstate.c | 10 ++++++++++
|
||||
1 file changed, 10 insertions(+)
|
||||
|
||||
--- a/drivers/cpufreq/amd-pstate.c
|
||||
+++ b/drivers/cpufreq/amd-pstate.c
|
||||
@@ -1221,11 +1221,21 @@ static int amd_pstate_register_driver(in
|
||||
return -EINVAL;
|
||||
|
||||
cppc_state = mode;
|
||||
+
|
||||
+ ret = amd_pstate_enable(true);
|
||||
+ if (ret) {
|
||||
+ pr_err("failed to enable cppc during amd-pstate driver registration, return %d\n",
|
||||
+ ret);
|
||||
+ amd_pstate_driver_cleanup();
|
||||
+ return ret;
|
||||
+ }
|
||||
+
|
||||
ret = cpufreq_register_driver(current_pstate_driver);
|
||||
if (ret) {
|
||||
amd_pstate_driver_cleanup();
|
||||
return ret;
|
||||
}
|
||||
+
|
||||
return 0;
|
||||
}
|
||||
|
@ -0,0 +1,57 @@
|
||||
From c4fde0d177bdb33912f450914d84d6432391a8b5 Mon Sep 17 00:00:00 2001
|
||||
From: Mario Limonciello <mario.limonciello@amd.com>
|
||||
Date: Sat, 12 Oct 2024 12:45:16 -0500
|
||||
Subject: cpufreq/amd-pstate: Use nominal perf for limits when boost is
|
||||
disabled
|
||||
|
||||
When boost has been disabled the limit for perf should be nominal perf not
|
||||
the highest perf. Using the latter to do calculations will lead to
|
||||
incorrect values that are still above nominal.
|
||||
|
||||
Fixes: ad4caad58d91 ("cpufreq: amd-pstate: Merge amd_pstate_highest_perf_set() into amd_get_boost_ratio_numerator()")
|
||||
Reported-by: Peter Jung <ptr1337@cachyos.org>
|
||||
Closes: https://bugzilla.kernel.org/show_bug.cgi?id=219348
|
||||
Signed-off-by: Mario Limonciello <mario.limonciello@amd.com>
|
||||
---
|
||||
drivers/cpufreq/amd-pstate.c | 20 ++++++++++++++------
|
||||
1 file changed, 14 insertions(+), 6 deletions(-)
|
||||
|
||||
--- a/drivers/cpufreq/amd-pstate.c
|
||||
+++ b/drivers/cpufreq/amd-pstate.c
|
||||
@@ -566,11 +566,16 @@ static int amd_pstate_verify(struct cpuf
|
||||
|
||||
static int amd_pstate_update_min_max_limit(struct cpufreq_policy *policy)
|
||||
{
|
||||
- u32 max_limit_perf, min_limit_perf, lowest_perf;
|
||||
+ u32 max_limit_perf, min_limit_perf, lowest_perf, max_perf;
|
||||
struct amd_cpudata *cpudata = policy->driver_data;
|
||||
|
||||
- max_limit_perf = div_u64(policy->max * cpudata->highest_perf, cpudata->max_freq);
|
||||
- min_limit_perf = div_u64(policy->min * cpudata->highest_perf, cpudata->max_freq);
|
||||
+ if (cpudata->boost_supported && !policy->boost_enabled)
|
||||
+ max_perf = READ_ONCE(cpudata->nominal_perf);
|
||||
+ else
|
||||
+ max_perf = READ_ONCE(cpudata->highest_perf);
|
||||
+
|
||||
+ max_limit_perf = div_u64(policy->max * max_perf, policy->cpuinfo.max_freq);
|
||||
+ min_limit_perf = div_u64(policy->min * max_perf, policy->cpuinfo.max_freq);
|
||||
|
||||
lowest_perf = READ_ONCE(cpudata->lowest_perf);
|
||||
if (min_limit_perf < lowest_perf)
|
||||
@@ -1526,10 +1531,13 @@ static int amd_pstate_epp_update_limit(s
|
||||
u64 value;
|
||||
s16 epp;
|
||||
|
||||
- max_perf = READ_ONCE(cpudata->highest_perf);
|
||||
+ if (cpudata->boost_supported && !policy->boost_enabled)
|
||||
+ max_perf = READ_ONCE(cpudata->nominal_perf);
|
||||
+ else
|
||||
+ max_perf = READ_ONCE(cpudata->highest_perf);
|
||||
min_perf = READ_ONCE(cpudata->lowest_perf);
|
||||
- max_limit_perf = div_u64(policy->max * cpudata->highest_perf, cpudata->max_freq);
|
||||
- min_limit_perf = div_u64(policy->min * cpudata->highest_perf, cpudata->max_freq);
|
||||
+ max_limit_perf = div_u64(policy->max * max_perf, policy->cpuinfo.max_freq);
|
||||
+ min_limit_perf = div_u64(policy->min * max_perf, policy->cpuinfo.max_freq);
|
||||
|
||||
if (min_limit_perf < min_perf)
|
||||
min_limit_perf = min_perf;
|
@ -0,0 +1,55 @@
|
||||
From 01ad0fb3da95867947d923596a26b18d844afe3c Mon Sep 17 00:00:00 2001
|
||||
From: Mario Limonciello <mario.limonciello@amd.com>
|
||||
Date: Sat, 12 Oct 2024 12:45:17 -0500
|
||||
Subject: cpufreq/amd-pstate: Don't update CPPC request in
|
||||
amd_pstate_cpu_boost_update()
|
||||
|
||||
When boost is changed the CPPC value is changed in amd_pstate_cpu_boost_update()
|
||||
but then changed again when refresh_frequency_limits() and all it's callbacks
|
||||
occur. The first is a pointless write, so instead just update the limits for
|
||||
the policy and let the policy refresh anchor everything properly.
|
||||
|
||||
Fixes: c8c68c38b56f ("cpufreq: amd-pstate: initialize core precision boost state")
|
||||
Signed-off-by: Mario Limonciello <mario.limonciello@amd.com>
|
||||
---
|
||||
drivers/cpufreq/amd-pstate.c | 24 +-----------------------
|
||||
1 file changed, 1 insertion(+), 23 deletions(-)
|
||||
|
||||
--- a/drivers/cpufreq/amd-pstate.c
|
||||
+++ b/drivers/cpufreq/amd-pstate.c
|
||||
@@ -695,34 +695,12 @@ static void amd_pstate_adjust_perf(unsig
|
||||
static int amd_pstate_cpu_boost_update(struct cpufreq_policy *policy, bool on)
|
||||
{
|
||||
struct amd_cpudata *cpudata = policy->driver_data;
|
||||
- struct cppc_perf_ctrls perf_ctrls;
|
||||
- u32 highest_perf, nominal_perf, nominal_freq, max_freq;
|
||||
+ u32 nominal_freq, max_freq;
|
||||
int ret = 0;
|
||||
|
||||
- highest_perf = READ_ONCE(cpudata->highest_perf);
|
||||
- nominal_perf = READ_ONCE(cpudata->nominal_perf);
|
||||
nominal_freq = READ_ONCE(cpudata->nominal_freq);
|
||||
max_freq = READ_ONCE(cpudata->max_freq);
|
||||
|
||||
- if (boot_cpu_has(X86_FEATURE_CPPC)) {
|
||||
- u64 value = READ_ONCE(cpudata->cppc_req_cached);
|
||||
-
|
||||
- value &= ~GENMASK_ULL(7, 0);
|
||||
- value |= on ? highest_perf : nominal_perf;
|
||||
- WRITE_ONCE(cpudata->cppc_req_cached, value);
|
||||
-
|
||||
- wrmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_REQ, value);
|
||||
- } else {
|
||||
- perf_ctrls.max_perf = on ? highest_perf : nominal_perf;
|
||||
- ret = cppc_set_perf(cpudata->cpu, &perf_ctrls);
|
||||
- if (ret) {
|
||||
- cpufreq_cpu_release(policy);
|
||||
- pr_debug("Failed to set max perf on CPU:%d. ret:%d\n",
|
||||
- cpudata->cpu, ret);
|
||||
- return ret;
|
||||
- }
|
||||
- }
|
||||
-
|
||||
if (on)
|
||||
policy->cpuinfo.max_freq = max_freq;
|
||||
else if (policy->cpuinfo.max_freq > nominal_freq * 1000)
|
@ -0,0 +1,49 @@
|
||||
From 684d162c08ab86fff02861c907ecc92bf9c09af4 Mon Sep 17 00:00:00 2001
|
||||
From: Mario Limonciello <mario.limonciello@amd.com>
|
||||
Date: Sat, 12 Oct 2024 12:45:18 -0500
|
||||
Subject: cpufreq/amd-pstate: Use amd_pstate_update_min_max_limit() for EPP
|
||||
limits
|
||||
|
||||
When the EPP updates are set the maximum capable frequency for the
|
||||
CPU is used to set the upper limit instead of that of the policy.
|
||||
|
||||
Adjust amd_pstate_epp_update_limit() to reuse policy calculation code
|
||||
from amd_pstate_update_min_max_limit().
|
||||
|
||||
Signed-off-by: Mario Limonciello <mario.limonciello@amd.com>
|
||||
---
|
||||
drivers/cpufreq/amd-pstate.c | 19 +++----------------
|
||||
1 file changed, 3 insertions(+), 16 deletions(-)
|
||||
|
||||
--- a/drivers/cpufreq/amd-pstate.c
|
||||
+++ b/drivers/cpufreq/amd-pstate.c
|
||||
@@ -1505,26 +1505,13 @@ static void amd_pstate_epp_cpu_exit(stru
|
||||
static int amd_pstate_epp_update_limit(struct cpufreq_policy *policy)
|
||||
{
|
||||
struct amd_cpudata *cpudata = policy->driver_data;
|
||||
- u32 max_perf, min_perf, min_limit_perf, max_limit_perf;
|
||||
+ u32 max_perf, min_perf;
|
||||
u64 value;
|
||||
s16 epp;
|
||||
|
||||
- if (cpudata->boost_supported && !policy->boost_enabled)
|
||||
- max_perf = READ_ONCE(cpudata->nominal_perf);
|
||||
- else
|
||||
- max_perf = READ_ONCE(cpudata->highest_perf);
|
||||
+ max_perf = READ_ONCE(cpudata->highest_perf);
|
||||
min_perf = READ_ONCE(cpudata->lowest_perf);
|
||||
- max_limit_perf = div_u64(policy->max * max_perf, policy->cpuinfo.max_freq);
|
||||
- min_limit_perf = div_u64(policy->min * max_perf, policy->cpuinfo.max_freq);
|
||||
-
|
||||
- if (min_limit_perf < min_perf)
|
||||
- min_limit_perf = min_perf;
|
||||
-
|
||||
- if (max_limit_perf < min_limit_perf)
|
||||
- max_limit_perf = min_limit_perf;
|
||||
-
|
||||
- WRITE_ONCE(cpudata->max_limit_perf, max_limit_perf);
|
||||
- WRITE_ONCE(cpudata->min_limit_perf, min_limit_perf);
|
||||
+ amd_pstate_update_min_max_limit(policy);
|
||||
|
||||
max_perf = clamp_t(unsigned long, max_perf, cpudata->min_limit_perf,
|
||||
cpudata->max_limit_perf);
|
@ -0,0 +1,29 @@
|
||||
From fa46d2873c9fa4060ce407e4bc5c7e29babce9d0 Mon Sep 17 00:00:00 2001
|
||||
From: Mario Limonciello <mario.limonciello@amd.com>
|
||||
Date: Sat, 12 Oct 2024 12:45:19 -0500
|
||||
Subject: cpufreq/amd-pstate: Drop needless EPP initialization
|
||||
|
||||
The EPP value doesn't need to be cached to the CPPC request in
|
||||
amd_pstate_epp_update_limit() because it's passed as an argument
|
||||
at the end to amd_pstate_set_epp() and stored at that time.
|
||||
|
||||
Signed-off-by: Mario Limonciello <mario.limonciello@amd.com>
|
||||
---
|
||||
drivers/cpufreq/amd-pstate.c | 6 ------
|
||||
1 file changed, 6 deletions(-)
|
||||
|
||||
--- a/drivers/cpufreq/amd-pstate.c
|
||||
+++ b/drivers/cpufreq/amd-pstate.c
|
||||
@@ -1548,12 +1548,6 @@ static int amd_pstate_epp_update_limit(s
|
||||
if (cpudata->policy == CPUFREQ_POLICY_PERFORMANCE)
|
||||
epp = 0;
|
||||
|
||||
- /* Set initial EPP value */
|
||||
- if (cpu_feature_enabled(X86_FEATURE_CPPC)) {
|
||||
- value &= ~GENMASK_ULL(31, 24);
|
||||
- value |= (u64)epp << 24;
|
||||
- }
|
||||
-
|
||||
WRITE_ONCE(cpudata->cppc_req_cached, value);
|
||||
return amd_pstate_set_epp(cpudata, epp);
|
||||
}
|
@ -0,0 +1,228 @@
|
||||
From 649d296be0c7f0df6e71b4fca25fdbe75cb3994e Mon Sep 17 00:00:00 2001
|
||||
From: Oleksandr Natalenko <oleksandr@natalenko.name>
|
||||
Date: Thu, 17 Oct 2024 17:03:11 +0200
|
||||
Subject: amd-pstate-6.11: update setting the minimum frequency to
|
||||
lowest_nonlinear_freq patchset to v3
|
||||
|
||||
Signed-off-by: Oleksandr Natalenko <oleksandr@natalenko.name>
|
||||
---
|
||||
drivers/cpufreq/amd-pstate.c | 67 +++++++++++++++++++++---------------
|
||||
drivers/cpufreq/amd-pstate.h | 4 +--
|
||||
drivers/cpufreq/cpufreq.c | 6 +---
|
||||
include/linux/cpufreq.h | 6 ----
|
||||
4 files changed, 43 insertions(+), 40 deletions(-)
|
||||
|
||||
--- a/drivers/cpufreq/amd-pstate.c
|
||||
+++ b/drivers/cpufreq/amd-pstate.c
|
||||
@@ -557,9 +557,28 @@ cpufreq_policy_put:
|
||||
cpufreq_cpu_put(policy);
|
||||
}
|
||||
|
||||
-static int amd_pstate_verify(struct cpufreq_policy_data *policy)
|
||||
+static int amd_pstate_verify(struct cpufreq_policy_data *policy_data)
|
||||
{
|
||||
- cpufreq_verify_within_cpu_limits(policy);
|
||||
+ /*
|
||||
+ * Initialize lower frequency limit (i.e.policy->min) with
|
||||
+ * lowest_nonlinear_frequency which is the most energy efficient
|
||||
+ * frequency. Override the initial value set by cpufreq core and
|
||||
+ * amd-pstate qos_requests.
|
||||
+ */
|
||||
+ if (policy_data->min == FREQ_QOS_MIN_DEFAULT_VALUE) {
|
||||
+ struct cpufreq_policy *policy = cpufreq_cpu_get(policy_data->cpu);
|
||||
+ struct amd_cpudata *cpudata;
|
||||
+
|
||||
+ if (!policy)
|
||||
+ return -EINVAL;
|
||||
+
|
||||
+ cpudata = policy->driver_data;
|
||||
+ policy_data->min = cpudata->lowest_nonlinear_freq;
|
||||
+ cpufreq_cpu_put(policy);
|
||||
+ }
|
||||
+
|
||||
+ cpufreq_verify_within_cpu_limits(policy_data);
|
||||
+ pr_debug("policy_max =%d, policy_min=%d\n", policy_data->max, policy_data->min);
|
||||
|
||||
return 0;
|
||||
}
|
||||
@@ -709,7 +728,7 @@ static int amd_pstate_cpu_boost_update(s
|
||||
policy->max = policy->cpuinfo.max_freq;
|
||||
|
||||
if (cppc_state == AMD_PSTATE_PASSIVE) {
|
||||
- ret = freq_qos_update_request(&cpudata->max_freq_req, policy->cpuinfo.max_freq);
|
||||
+ ret = freq_qos_update_request(&cpudata->req[1], policy->cpuinfo.max_freq);
|
||||
if (ret < 0)
|
||||
pr_debug("Failed to update freq constraint: CPU%d\n", cpudata->cpu);
|
||||
}
|
||||
@@ -976,17 +995,17 @@ static int amd_pstate_cpu_init(struct cp
|
||||
|
||||
ret = amd_pstate_init_perf(cpudata);
|
||||
if (ret)
|
||||
- goto free_cpudata;
|
||||
+ goto free_cpudata1;
|
||||
|
||||
amd_pstate_init_prefcore(cpudata);
|
||||
|
||||
ret = amd_pstate_init_freq(cpudata);
|
||||
if (ret)
|
||||
- goto free_cpudata;
|
||||
+ goto free_cpudata1;
|
||||
|
||||
ret = amd_pstate_init_boost_support(cpudata);
|
||||
if (ret)
|
||||
- goto free_cpudata;
|
||||
+ goto free_cpudata1;
|
||||
|
||||
min_freq = READ_ONCE(cpudata->min_freq);
|
||||
max_freq = READ_ONCE(cpudata->max_freq);
|
||||
@@ -1008,11 +1027,18 @@ static int amd_pstate_cpu_init(struct cp
|
||||
if (cpu_feature_enabled(X86_FEATURE_CPPC))
|
||||
policy->fast_switch_possible = true;
|
||||
|
||||
- ret = freq_qos_add_request(&policy->constraints, &cpudata->max_freq_req,
|
||||
+ ret = freq_qos_add_request(&policy->constraints, &cpudata->req[0],
|
||||
+ FREQ_QOS_MIN, FREQ_QOS_MIN_DEFAULT_VALUE);
|
||||
+ if (ret < 0) {
|
||||
+ dev_err(dev, "Failed to add min-freq constraint (%d)\n", ret);
|
||||
+ goto free_cpudata1;
|
||||
+ }
|
||||
+
|
||||
+ ret = freq_qos_add_request(&policy->constraints, &cpudata->req[1],
|
||||
FREQ_QOS_MAX, policy->cpuinfo.max_freq);
|
||||
if (ret < 0) {
|
||||
dev_err(dev, "Failed to add max-freq constraint (%d)\n", ret);
|
||||
- goto free_cpudata;
|
||||
+ goto free_cpudata2;
|
||||
}
|
||||
|
||||
cpudata->max_limit_freq = max_freq;
|
||||
@@ -1025,7 +1051,9 @@ static int amd_pstate_cpu_init(struct cp
|
||||
|
||||
return 0;
|
||||
|
||||
-free_cpudata:
|
||||
+free_cpudata2:
|
||||
+ freq_qos_remove_request(&cpudata->req[0]);
|
||||
+free_cpudata1:
|
||||
kfree(cpudata);
|
||||
return ret;
|
||||
}
|
||||
@@ -1034,7 +1062,8 @@ static void amd_pstate_cpu_exit(struct c
|
||||
{
|
||||
struct amd_cpudata *cpudata = policy->driver_data;
|
||||
|
||||
- freq_qos_remove_request(&cpudata->max_freq_req);
|
||||
+ freq_qos_remove_request(&cpudata->req[1]);
|
||||
+ freq_qos_remove_request(&cpudata->req[0]);
|
||||
policy->fast_switch_possible = false;
|
||||
kfree(cpudata);
|
||||
}
|
||||
@@ -1658,13 +1687,6 @@ static int amd_pstate_epp_cpu_offline(st
|
||||
return 0;
|
||||
}
|
||||
|
||||
-static int amd_pstate_epp_verify_policy(struct cpufreq_policy_data *policy)
|
||||
-{
|
||||
- cpufreq_verify_within_cpu_limits(policy);
|
||||
- pr_debug("policy_max =%d, policy_min=%d\n", policy->max, policy->min);
|
||||
- return 0;
|
||||
-}
|
||||
-
|
||||
static int amd_pstate_epp_suspend(struct cpufreq_policy *policy)
|
||||
{
|
||||
struct amd_cpudata *cpudata = policy->driver_data;
|
||||
@@ -1703,13 +1725,6 @@ static int amd_pstate_epp_resume(struct
|
||||
return 0;
|
||||
}
|
||||
|
||||
-static int amd_pstate_get_init_min_freq(struct cpufreq_policy *policy)
|
||||
-{
|
||||
- struct amd_cpudata *cpudata = policy->driver_data;
|
||||
-
|
||||
- return READ_ONCE(cpudata->lowest_nonlinear_freq);
|
||||
-}
|
||||
-
|
||||
static struct cpufreq_driver amd_pstate_driver = {
|
||||
.flags = CPUFREQ_CONST_LOOPS | CPUFREQ_NEED_UPDATE_LIMITS,
|
||||
.verify = amd_pstate_verify,
|
||||
@@ -1723,12 +1738,11 @@ static struct cpufreq_driver amd_pstate_
|
||||
.update_limits = amd_pstate_update_limits,
|
||||
.name = "amd-pstate",
|
||||
.attr = amd_pstate_attr,
|
||||
- .get_init_min_freq = amd_pstate_get_init_min_freq,
|
||||
};
|
||||
|
||||
static struct cpufreq_driver amd_pstate_epp_driver = {
|
||||
.flags = CPUFREQ_CONST_LOOPS,
|
||||
- .verify = amd_pstate_epp_verify_policy,
|
||||
+ .verify = amd_pstate_verify,
|
||||
.setpolicy = amd_pstate_epp_set_policy,
|
||||
.init = amd_pstate_epp_cpu_init,
|
||||
.exit = amd_pstate_epp_cpu_exit,
|
||||
@@ -1740,7 +1754,6 @@ static struct cpufreq_driver amd_pstate_
|
||||
.set_boost = amd_pstate_set_boost,
|
||||
.name = "amd-pstate-epp",
|
||||
.attr = amd_pstate_epp_attr,
|
||||
- .get_init_min_freq = amd_pstate_get_init_min_freq,
|
||||
};
|
||||
|
||||
static int __init amd_pstate_set_driver(int mode_idx)
|
||||
--- a/drivers/cpufreq/amd-pstate.h
|
||||
+++ b/drivers/cpufreq/amd-pstate.h
|
||||
@@ -28,7 +28,7 @@ struct amd_aperf_mperf {
|
||||
/**
|
||||
* struct amd_cpudata - private CPU data for AMD P-State
|
||||
* @cpu: CPU number
|
||||
- * @max_freq_req: maximum frequency constraint request to apply
|
||||
+ * @req: constraint request to apply
|
||||
* @cppc_req_cached: cached performance request hints
|
||||
* @highest_perf: the maximum performance an individual processor may reach,
|
||||
* assuming ideal conditions
|
||||
@@ -68,7 +68,7 @@ struct amd_aperf_mperf {
|
||||
struct amd_cpudata {
|
||||
int cpu;
|
||||
|
||||
- struct freq_qos_request max_freq_req;
|
||||
+ struct freq_qos_request req[2];
|
||||
u64 cppc_req_cached;
|
||||
|
||||
u32 highest_perf;
|
||||
--- a/drivers/cpufreq/cpufreq.c
|
||||
+++ b/drivers/cpufreq/cpufreq.c
|
||||
@@ -1380,7 +1380,6 @@ static int cpufreq_online(unsigned int c
|
||||
bool new_policy;
|
||||
unsigned long flags;
|
||||
unsigned int j;
|
||||
- u32 init_min_freq = FREQ_QOS_MIN_DEFAULT_VALUE;
|
||||
int ret;
|
||||
|
||||
pr_debug("%s: bringing CPU%u online\n", __func__, cpu);
|
||||
@@ -1465,12 +1464,9 @@ static int cpufreq_online(unsigned int c
|
||||
goto out_destroy_policy;
|
||||
}
|
||||
|
||||
- if (cpufreq_driver->get_init_min_freq)
|
||||
- init_min_freq = cpufreq_driver->get_init_min_freq(policy);
|
||||
-
|
||||
ret = freq_qos_add_request(&policy->constraints,
|
||||
policy->min_freq_req, FREQ_QOS_MIN,
|
||||
- init_min_freq);
|
||||
+ FREQ_QOS_MIN_DEFAULT_VALUE);
|
||||
if (ret < 0) {
|
||||
/*
|
||||
* So we don't call freq_qos_remove_request() for an
|
||||
--- a/include/linux/cpufreq.h
|
||||
+++ b/include/linux/cpufreq.h
|
||||
@@ -414,12 +414,6 @@ struct cpufreq_driver {
|
||||
* policy is properly initialized, but before the governor is started.
|
||||
*/
|
||||
void (*register_em)(struct cpufreq_policy *policy);
|
||||
-
|
||||
- /*
|
||||
- * Set by drivers that want to initialize the policy->min_freq_req with
|
||||
- * a value different from the default value (0) in cpufreq core.
|
||||
- */
|
||||
- int (*get_init_min_freq)(struct cpufreq_policy *policy);
|
||||
};
|
||||
|
||||
/* flags */
|
@ -0,0 +1,44 @@
|
||||
From db147a0a6341822a15fd9c4cd51f8dc4a9a1747b Mon Sep 17 00:00:00 2001
|
||||
From: Dhananjay Ugwekar <Dhananjay.Ugwekar@amd.com>
|
||||
Date: Thu, 17 Oct 2024 10:05:27 +0000
|
||||
Subject: cpufreq/amd-pstate: Call amd_pstate_register() in amd_pstate_init()
|
||||
|
||||
Replace a similar chunk of code in amd_pstate_init() with
|
||||
amd_pstate_register() call.
|
||||
|
||||
Suggested-by: Mario Limonciello <mario.limonciello@amd.com>
|
||||
Signed-off-by: Dhananjay Ugwekar <Dhananjay.Ugwekar@amd.com>
|
||||
---
|
||||
drivers/cpufreq/amd-pstate.c | 12 ++----------
|
||||
1 file changed, 2 insertions(+), 10 deletions(-)
|
||||
|
||||
--- a/drivers/cpufreq/amd-pstate.c
|
||||
+++ b/drivers/cpufreq/amd-pstate.c
|
||||
@@ -1909,17 +1909,10 @@ static int __init amd_pstate_init(void)
|
||||
return ret;
|
||||
}
|
||||
|
||||
- /* enable amd pstate feature */
|
||||
- ret = amd_pstate_enable(true);
|
||||
- if (ret) {
|
||||
- pr_err("failed to enable driver mode(%d)\n", cppc_state);
|
||||
- return ret;
|
||||
- }
|
||||
-
|
||||
- ret = cpufreq_register_driver(current_pstate_driver);
|
||||
+ ret = amd_pstate_register_driver(cppc_state);
|
||||
if (ret) {
|
||||
pr_err("failed to register with return %d\n", ret);
|
||||
- goto disable_driver;
|
||||
+ return ret;
|
||||
}
|
||||
|
||||
dev_root = bus_get_dev_root(&cpu_subsys);
|
||||
@@ -1936,7 +1929,6 @@ static int __init amd_pstate_init(void)
|
||||
|
||||
global_attr_free:
|
||||
cpufreq_unregister_driver(current_pstate_driver);
|
||||
-disable_driver:
|
||||
amd_pstate_enable(false);
|
||||
return ret;
|
||||
}
|
@ -0,0 +1,81 @@
|
||||
From 7c658490b05f6ab4dd59e1c25e75ba1037f6cfeb Mon Sep 17 00:00:00 2001
|
||||
From: Dhananjay Ugwekar <Dhananjay.Ugwekar@amd.com>
|
||||
Date: Thu, 17 Oct 2024 10:05:29 +0000
|
||||
Subject: cpufreq/amd-pstate: Call amd_pstate_set_driver() in
|
||||
amd_pstate_register_driver()
|
||||
|
||||
Replace a similar chunk of code in amd_pstate_register_driver() with
|
||||
amd_pstate_set_driver() call.
|
||||
|
||||
Signed-off-by: Dhananjay Ugwekar <Dhananjay.Ugwekar@amd.com>
|
||||
---
|
||||
drivers/cpufreq/amd-pstate.c | 47 +++++++++++++++++-------------------
|
||||
1 file changed, 22 insertions(+), 25 deletions(-)
|
||||
|
||||
--- a/drivers/cpufreq/amd-pstate.c
|
||||
+++ b/drivers/cpufreq/amd-pstate.c
|
||||
@@ -1221,16 +1221,32 @@ static void amd_pstate_driver_cleanup(vo
|
||||
current_pstate_driver = NULL;
|
||||
}
|
||||
|
||||
+static int amd_pstate_set_driver(int mode_idx)
|
||||
+{
|
||||
+ if (mode_idx >= AMD_PSTATE_DISABLE && mode_idx < AMD_PSTATE_MAX) {
|
||||
+ cppc_state = mode_idx;
|
||||
+ if (cppc_state == AMD_PSTATE_DISABLE)
|
||||
+ pr_info("driver is explicitly disabled\n");
|
||||
+
|
||||
+ if (cppc_state == AMD_PSTATE_ACTIVE)
|
||||
+ current_pstate_driver = &amd_pstate_epp_driver;
|
||||
+
|
||||
+ if (cppc_state == AMD_PSTATE_PASSIVE || cppc_state == AMD_PSTATE_GUIDED)
|
||||
+ current_pstate_driver = &amd_pstate_driver;
|
||||
+
|
||||
+ return 0;
|
||||
+ }
|
||||
+
|
||||
+ return -EINVAL;
|
||||
+}
|
||||
+
|
||||
static int amd_pstate_register_driver(int mode)
|
||||
{
|
||||
int ret;
|
||||
|
||||
- if (mode == AMD_PSTATE_PASSIVE || mode == AMD_PSTATE_GUIDED)
|
||||
- current_pstate_driver = &amd_pstate_driver;
|
||||
- else if (mode == AMD_PSTATE_ACTIVE)
|
||||
- current_pstate_driver = &amd_pstate_epp_driver;
|
||||
- else
|
||||
- return -EINVAL;
|
||||
+ ret = amd_pstate_set_driver(mode);
|
||||
+ if (ret)
|
||||
+ return ret;
|
||||
|
||||
cppc_state = mode;
|
||||
|
||||
@@ -1756,25 +1772,6 @@ static struct cpufreq_driver amd_pstate_
|
||||
.attr = amd_pstate_epp_attr,
|
||||
};
|
||||
|
||||
-static int __init amd_pstate_set_driver(int mode_idx)
|
||||
-{
|
||||
- if (mode_idx >= AMD_PSTATE_DISABLE && mode_idx < AMD_PSTATE_MAX) {
|
||||
- cppc_state = mode_idx;
|
||||
- if (cppc_state == AMD_PSTATE_DISABLE)
|
||||
- pr_info("driver is explicitly disabled\n");
|
||||
-
|
||||
- if (cppc_state == AMD_PSTATE_ACTIVE)
|
||||
- current_pstate_driver = &amd_pstate_epp_driver;
|
||||
-
|
||||
- if (cppc_state == AMD_PSTATE_PASSIVE || cppc_state == AMD_PSTATE_GUIDED)
|
||||
- current_pstate_driver = &amd_pstate_driver;
|
||||
-
|
||||
- return 0;
|
||||
- }
|
||||
-
|
||||
- return -EINVAL;
|
||||
-}
|
||||
-
|
||||
/*
|
||||
* CPPC function is not supported for family ID 17H with model_ID ranging from 0x10 to 0x2F.
|
||||
* show the debug message that helps to check if the CPU has CPPC support for loading issue.
|
@ -0,0 +1,41 @@
|
||||
From 55be5db97f4f52badc958463ee8d9cbc2ae91615 Mon Sep 17 00:00:00 2001
|
||||
From: Dhananjay Ugwekar <Dhananjay.Ugwekar@amd.com>
|
||||
Date: Thu, 17 Oct 2024 10:05:31 +0000
|
||||
Subject: cpufreq/amd-pstate: Remove the switch case in amd_pstate_init()
|
||||
|
||||
Replace the switch case with a more readable if condition.
|
||||
|
||||
Signed-off-by: Dhananjay Ugwekar <Dhananjay.Ugwekar@amd.com>
|
||||
---
|
||||
drivers/cpufreq/amd-pstate.c | 16 +++++-----------
|
||||
1 file changed, 5 insertions(+), 11 deletions(-)
|
||||
|
||||
--- a/drivers/cpufreq/amd-pstate.c
|
||||
+++ b/drivers/cpufreq/amd-pstate.c
|
||||
@@ -1873,21 +1873,15 @@ static int __init amd_pstate_init(void)
|
||||
cppc_state = CONFIG_X86_AMD_PSTATE_DEFAULT_MODE;
|
||||
}
|
||||
|
||||
- switch (cppc_state) {
|
||||
- case AMD_PSTATE_DISABLE:
|
||||
+ if (cppc_state == AMD_PSTATE_DISABLE) {
|
||||
pr_info("driver load is disabled, boot with specific mode to enable this\n");
|
||||
return -ENODEV;
|
||||
- case AMD_PSTATE_PASSIVE:
|
||||
- case AMD_PSTATE_ACTIVE:
|
||||
- case AMD_PSTATE_GUIDED:
|
||||
- ret = amd_pstate_set_driver(cppc_state);
|
||||
- if (ret)
|
||||
- return ret;
|
||||
- break;
|
||||
- default:
|
||||
- return -EINVAL;
|
||||
}
|
||||
|
||||
+ ret = amd_pstate_set_driver(cppc_state);
|
||||
+ if (ret)
|
||||
+ return ret;
|
||||
+
|
||||
/* capability check */
|
||||
if (cpu_feature_enabled(X86_FEATURE_CPPC)) {
|
||||
pr_debug("AMD CPPC MSR based functionality is supported\n");
|
@ -0,0 +1,43 @@
|
||||
From 7305364888151cb9e6b435c5f219ccfd18132b58 Mon Sep 17 00:00:00 2001
|
||||
From: Dhananjay Ugwekar <Dhananjay.Ugwekar@amd.com>
|
||||
Date: Thu, 17 Oct 2024 10:05:33 +0000
|
||||
Subject: cpufreq/amd-pstate: Remove the redundant amd_pstate_set_driver() call
|
||||
|
||||
amd_pstate_set_driver() is called twice, once in amd_pstate_init() and once
|
||||
as part of amd_pstate_register_driver(). Move around code and eliminate
|
||||
the redundancy.
|
||||
|
||||
Signed-off-by: Dhananjay Ugwekar <Dhananjay.Ugwekar@amd.com>
|
||||
---
|
||||
drivers/cpufreq/amd-pstate.c | 12 ++++--------
|
||||
1 file changed, 4 insertions(+), 8 deletions(-)
|
||||
|
||||
--- a/drivers/cpufreq/amd-pstate.c
|
||||
+++ b/drivers/cpufreq/amd-pstate.c
|
||||
@@ -1878,9 +1878,11 @@ static int __init amd_pstate_init(void)
|
||||
return -ENODEV;
|
||||
}
|
||||
|
||||
- ret = amd_pstate_set_driver(cppc_state);
|
||||
- if (ret)
|
||||
+ ret = amd_pstate_register_driver(cppc_state);
|
||||
+ if (ret) {
|
||||
+ pr_err("failed to register with return %d\n", ret);
|
||||
return ret;
|
||||
+ }
|
||||
|
||||
/* capability check */
|
||||
if (cpu_feature_enabled(X86_FEATURE_CPPC)) {
|
||||
@@ -1900,12 +1902,6 @@ static int __init amd_pstate_init(void)
|
||||
return ret;
|
||||
}
|
||||
|
||||
- ret = amd_pstate_register_driver(cppc_state);
|
||||
- if (ret) {
|
||||
- pr_err("failed to register with return %d\n", ret);
|
||||
- return ret;
|
||||
- }
|
||||
-
|
||||
dev_root = bus_get_dev_root(&cpu_subsys);
|
||||
if (dev_root) {
|
||||
ret = sysfs_create_group(&dev_root->kobj, &amd_pstate_global_attr_group);
|
@ -0,0 +1,33 @@
|
||||
From 5886ef269d069c72ea952cb00699e16221289e8c Mon Sep 17 00:00:00 2001
|
||||
From: Mario Limonciello <mario.limonciello@amd.com>
|
||||
Date: Thu, 17 Oct 2024 12:34:39 -0500
|
||||
Subject: cpufreq/amd-pstate-ut: Add fix for min freq unit test
|
||||
|
||||
commit 642aff3964b0f ("cpufreq/amd-pstate: Set the initial min_freq to
|
||||
lowest_nonlinear_freq") changed the iniital minimum frequency to lowest
|
||||
nonlinear frequency, but the unit tests weren't updated and now fail.
|
||||
|
||||
Update them to match this same change.
|
||||
|
||||
Fixes: 642aff3964b0f ("cpufreq/amd-pstate: Set the initial min_freq to lowest_nonlinear_freq")
|
||||
Signed-off-by: Mario Limonciello <mario.limonciello@amd.com>
|
||||
---
|
||||
drivers/cpufreq/amd-pstate-ut.c | 6 +++---
|
||||
1 file changed, 3 insertions(+), 3 deletions(-)
|
||||
|
||||
--- a/drivers/cpufreq/amd-pstate-ut.c
|
||||
+++ b/drivers/cpufreq/amd-pstate-ut.c
|
||||
@@ -227,10 +227,10 @@ static void amd_pstate_ut_check_freq(u32
|
||||
goto skip_test;
|
||||
}
|
||||
|
||||
- if (cpudata->min_freq != policy->min) {
|
||||
+ if (cpudata->lowest_nonlinear_freq != policy->min) {
|
||||
amd_pstate_ut_cases[index].result = AMD_PSTATE_UT_RESULT_FAIL;
|
||||
- pr_err("%s cpu%d cpudata_min_freq=%d policy_min=%d, they should be equal!\n",
|
||||
- __func__, cpu, cpudata->min_freq, policy->min);
|
||||
+ pr_err("%s cpu%d cpudata_lowest_nonlinear_freq=%d policy_min=%d, they should be equal!\n",
|
||||
+ __func__, cpu, cpudata->lowest_nonlinear_freq, policy->min);
|
||||
goto skip_test;
|
||||
}
|
||||
|
@ -0,0 +1,33 @@
|
||||
From e82b9b5a56bcac18cae68878fe67263279805735 Mon Sep 17 00:00:00 2001
|
||||
From: "Gautham R. Shenoy" <gautham.shenoy@amd.com>
|
||||
Date: Mon, 21 Oct 2024 15:48:35 +0530
|
||||
Subject: amd-pstate: Set min_perf to nominal_perf for active mode performance
|
||||
gov
|
||||
|
||||
The amd-pstate driver sets CPPC_REQ.min_perf to CPPC_REQ.max_perf when
|
||||
in active mode with performance governor. Typically CPPC_REQ.max_perf
|
||||
is set to CPPC.highest_perf. This causes frequency throttling on
|
||||
power-limited platforms which causes performance regressions on
|
||||
certain classes of workloads.
|
||||
|
||||
Hence, set the CPPC_REQ.min_perf to the CPPC.nominal_perf or
|
||||
CPPC_REQ.max_perf, whichever is lower of the two.
|
||||
|
||||
Fixes: ffa5096a7c33 ("cpufreq: amd-pstate: implement Pstate EPP support for the AMD processors")
|
||||
Signed-off-by: Gautham R. Shenoy <gautham.shenoy@amd.com>
|
||||
Reviewed-by: Mario Limonciello <mario.limonciello@amd.com>
|
||||
---
|
||||
drivers/cpufreq/amd-pstate.c | 2 +-
|
||||
1 file changed, 1 insertion(+), 1 deletion(-)
|
||||
|
||||
--- a/drivers/cpufreq/amd-pstate.c
|
||||
+++ b/drivers/cpufreq/amd-pstate.c
|
||||
@@ -1565,7 +1565,7 @@ static int amd_pstate_epp_update_limit(s
|
||||
value = READ_ONCE(cpudata->cppc_req_cached);
|
||||
|
||||
if (cpudata->policy == CPUFREQ_POLICY_PERFORMANCE)
|
||||
- min_perf = max_perf;
|
||||
+ min_perf = min(cpudata->nominal_perf, max_perf);
|
||||
|
||||
/* Initial min/max values for CPPC Performance Controls Register */
|
||||
value &= ~AMD_CPPC_MIN_PERF(~0L);
|
@ -0,0 +1,44 @@
|
||||
From 497447cf96a785a4edd0756da5d5718037f5687c Mon Sep 17 00:00:00 2001
|
||||
From: Swapnil Sapkal <swapnil.sapkal@amd.com>
|
||||
Date: Mon, 21 Oct 2024 15:48:36 +0530
|
||||
Subject: amd-pstate: Switch to amd-pstate by default on some Server platforms
|
||||
|
||||
Currently the default cpufreq driver for all the AMD EPYC servers is
|
||||
acpi-cpufreq. Going forward, switch to amd-pstate as the default
|
||||
driver on the AMD EPYC server platforms with CPU family 0x1A or
|
||||
higher. The default mode will be active mode.
|
||||
|
||||
Testing shows that amd-pstate with active mode and performance
|
||||
governor provides comparable or better performance per-watt against
|
||||
acpi-cpufreq + performance governor.
|
||||
|
||||
Likewise, amd-pstate with active mode and powersave governor with the
|
||||
energy_performance_preference=power (EPP=255) provides comparable or
|
||||
better performance per-watt against acpi-cpufreq + schedutil governor
|
||||
for a wide range of workloads.
|
||||
|
||||
Users can still revert to using acpi-cpufreq driver on these platforms
|
||||
with the "amd_pstate=disable" kernel commandline parameter.
|
||||
|
||||
Signed-off-by: Swapnil Sapkal <swapnil.sapkal@amd.com>
|
||||
Signed-off-by: Gautham R. Shenoy <gautham.shenoy@amd.com>
|
||||
Reviewed-by: Mario Limonciello <mario.limonciello@amd.com>
|
||||
---
|
||||
drivers/cpufreq/amd-pstate.c | 4 ++--
|
||||
1 file changed, 2 insertions(+), 2 deletions(-)
|
||||
|
||||
--- a/drivers/cpufreq/amd-pstate.c
|
||||
+++ b/drivers/cpufreq/amd-pstate.c
|
||||
@@ -1862,10 +1862,10 @@ static int __init amd_pstate_init(void)
|
||||
if (cppc_state == AMD_PSTATE_UNDEFINED) {
|
||||
/* Disable on the following configs by default:
|
||||
* 1. Undefined platforms
|
||||
- * 2. Server platforms
|
||||
+ * 2. Server platforms with CPUs older than Family 0x1A.
|
||||
*/
|
||||
if (amd_pstate_acpi_pm_profile_undefined() ||
|
||||
- amd_pstate_acpi_pm_profile_server()) {
|
||||
+ (amd_pstate_acpi_pm_profile_server() && boot_cpu_data.x86 < 0x1A)) {
|
||||
pr_info("driver load is disabled, boot with specific mode to enable this\n");
|
||||
return -ENODEV;
|
||||
}
|
@ -0,0 +1,38 @@
|
||||
From a4d255935a1ea6e4b10167df942ec641079bcdf7 Mon Sep 17 00:00:00 2001
|
||||
From: Mario Limonciello <mario.limonciello@amd.com>
|
||||
Date: Mon, 28 Oct 2024 09:55:41 -0500
|
||||
Subject: cpufreq/amd-pstate: Push adjust_perf vfunc init into cpu_init
|
||||
|
||||
As the driver can be changed in and out of different modes it's possible
|
||||
that adjust_perf is assigned when it shouldn't be.
|
||||
|
||||
This could happen if an MSR design is started up in passive mode and then
|
||||
switches to active mode.
|
||||
|
||||
To solve this explicitly clear `adjust_perf` in amd_pstate_epp_cpu_init().
|
||||
|
||||
Signed-off-by: Mario Limonciello <mario.limonciello@amd.com>
|
||||
---
|
||||
drivers/cpufreq/amd-pstate.c | 4 ++--
|
||||
1 file changed, 2 insertions(+), 2 deletions(-)
|
||||
|
||||
--- a/drivers/cpufreq/amd-pstate.c
|
||||
+++ b/drivers/cpufreq/amd-pstate.c
|
||||
@@ -1528,6 +1528,8 @@ static int amd_pstate_epp_cpu_init(struc
|
||||
WRITE_ONCE(cpudata->cppc_cap1_cached, value);
|
||||
}
|
||||
|
||||
+ current_pstate_driver->adjust_perf = NULL;
|
||||
+
|
||||
return 0;
|
||||
|
||||
free_cpudata1:
|
||||
@@ -1887,8 +1889,6 @@ static int __init amd_pstate_init(void)
|
||||
/* capability check */
|
||||
if (cpu_feature_enabled(X86_FEATURE_CPPC)) {
|
||||
pr_debug("AMD CPPC MSR based functionality is supported\n");
|
||||
- if (cppc_state != AMD_PSTATE_ACTIVE)
|
||||
- current_pstate_driver->adjust_perf = amd_pstate_adjust_perf;
|
||||
} else {
|
||||
pr_debug("AMD CPPC shared memory based functionality is supported\n");
|
||||
static_call_update(amd_pstate_enable, shmem_enable);
|
@ -0,0 +1,47 @@
|
||||
From c42a82a583646dcbba8500d47ed878616ab5c33a Mon Sep 17 00:00:00 2001
|
||||
From: Mario Limonciello <mario.limonciello@amd.com>
|
||||
Date: Mon, 28 Oct 2024 09:55:42 -0500
|
||||
Subject: cpufreq/amd-pstate: Move registration after static function call
|
||||
update
|
||||
|
||||
On shared memory designs the static functions need to work before
|
||||
registration is done or the system can hang at bootup.
|
||||
|
||||
Move the registration later in amd_pstate_init() to solve this.
|
||||
|
||||
Fixes: e238968a2087 ("cpufreq/amd-pstate: Remove the redundant amd_pstate_set_driver() call")
|
||||
Reported-by: Klara Modin <klarasmodin@gmail.com>
|
||||
Closes: https://lore.kernel.org/linux-pm/cf9c146d-bacf-444e-92e2-15ebf513af96@gmail.com/#t
|
||||
Signed-off-by: Mario Limonciello <mario.limonciello@amd.com>
|
||||
---
|
||||
drivers/cpufreq/amd-pstate.c | 12 ++++++------
|
||||
1 file changed, 6 insertions(+), 6 deletions(-)
|
||||
|
||||
--- a/drivers/cpufreq/amd-pstate.c
|
||||
+++ b/drivers/cpufreq/amd-pstate.c
|
||||
@@ -1880,12 +1880,6 @@ static int __init amd_pstate_init(void)
|
||||
return -ENODEV;
|
||||
}
|
||||
|
||||
- ret = amd_pstate_register_driver(cppc_state);
|
||||
- if (ret) {
|
||||
- pr_err("failed to register with return %d\n", ret);
|
||||
- return ret;
|
||||
- }
|
||||
-
|
||||
/* capability check */
|
||||
if (cpu_feature_enabled(X86_FEATURE_CPPC)) {
|
||||
pr_debug("AMD CPPC MSR based functionality is supported\n");
|
||||
@@ -1896,6 +1890,12 @@ static int __init amd_pstate_init(void)
|
||||
static_call_update(amd_pstate_update_perf, shmem_update_perf);
|
||||
}
|
||||
|
||||
+ ret = amd_pstate_register_driver(cppc_state);
|
||||
+ if (ret) {
|
||||
+ pr_err("failed to register with return %d\n", ret);
|
||||
+ return ret;
|
||||
+ }
|
||||
+
|
||||
if (amd_pstate_prefcore) {
|
||||
ret = amd_detect_prefcore(&amd_pstate_prefcore);
|
||||
if (ret)
|
321
debian/patches/patchset-pf/amd-rapl/0001-perf-Generic-hotplug-support-for-a-PMU-with-a-scope.patch
vendored
Normal file
321
debian/patches/patchset-pf/amd-rapl/0001-perf-Generic-hotplug-support-for-a-PMU-with-a-scope.patch
vendored
Normal file
@ -0,0 +1,321 @@
|
||||
From 023d6b8aa8d8b346cfdcccf5ca4cb880c8d41d87 Mon Sep 17 00:00:00 2001
|
||||
From: Kan Liang <kan.liang@linux.intel.com>
|
||||
Date: Fri, 2 Aug 2024 08:16:37 -0700
|
||||
Subject: perf: Generic hotplug support for a PMU with a scope
|
||||
|
||||
The perf subsystem assumes that the counters of a PMU are per-CPU. So
|
||||
the user space tool reads a counter from each CPU in the system wide
|
||||
mode. However, many PMUs don't have a per-CPU counter. The counter is
|
||||
effective for a scope, e.g., a die or a socket. To address this, a
|
||||
cpumask is exposed by the kernel driver to restrict to one CPU to stand
|
||||
for a specific scope. In case the given CPU is removed,
|
||||
the hotplug support has to be implemented for each such driver.
|
||||
|
||||
The codes to support the cpumask and hotplug are very similar.
|
||||
- Expose a cpumask into sysfs
|
||||
- Pickup another CPU in the same scope if the given CPU is removed.
|
||||
- Invoke the perf_pmu_migrate_context() to migrate to a new CPU.
|
||||
- In event init, always set the CPU in the cpumask to event->cpu
|
||||
|
||||
Similar duplicated codes are implemented for each such PMU driver. It
|
||||
would be good to introduce a generic infrastructure to avoid such
|
||||
duplication.
|
||||
|
||||
5 popular scopes are implemented here, core, die, cluster, pkg, and
|
||||
the system-wide. The scope can be set when a PMU is registered. If so, a
|
||||
"cpumask" is automatically exposed for the PMU.
|
||||
|
||||
The "cpumask" is from the perf_online_<scope>_mask, which is to track
|
||||
the active CPU for each scope. They are set when the first CPU of the
|
||||
scope is online via the generic perf hotplug support. When a
|
||||
corresponding CPU is removed, the perf_online_<scope>_mask is updated
|
||||
accordingly and the PMU will be moved to a new CPU from the same scope
|
||||
if possible.
|
||||
|
||||
Signed-off-by: Kan Liang <kan.liang@linux.intel.com>
|
||||
---
|
||||
include/linux/perf_event.h | 18 ++++
|
||||
kernel/events/core.c | 164 ++++++++++++++++++++++++++++++++++++-
|
||||
2 files changed, 180 insertions(+), 2 deletions(-)
|
||||
|
||||
--- a/include/linux/perf_event.h
|
||||
+++ b/include/linux/perf_event.h
|
||||
@@ -292,6 +292,19 @@ struct perf_event_pmu_context;
|
||||
#define PERF_PMU_CAP_AUX_OUTPUT 0x0080
|
||||
#define PERF_PMU_CAP_EXTENDED_HW_TYPE 0x0100
|
||||
|
||||
+/**
|
||||
+ * pmu::scope
|
||||
+ */
|
||||
+enum perf_pmu_scope {
|
||||
+ PERF_PMU_SCOPE_NONE = 0,
|
||||
+ PERF_PMU_SCOPE_CORE,
|
||||
+ PERF_PMU_SCOPE_DIE,
|
||||
+ PERF_PMU_SCOPE_CLUSTER,
|
||||
+ PERF_PMU_SCOPE_PKG,
|
||||
+ PERF_PMU_SCOPE_SYS_WIDE,
|
||||
+ PERF_PMU_MAX_SCOPE,
|
||||
+};
|
||||
+
|
||||
struct perf_output_handle;
|
||||
|
||||
#define PMU_NULL_DEV ((void *)(~0UL))
|
||||
@@ -315,6 +328,11 @@ struct pmu {
|
||||
*/
|
||||
int capabilities;
|
||||
|
||||
+ /*
|
||||
+ * PMU scope
|
||||
+ */
|
||||
+ unsigned int scope;
|
||||
+
|
||||
int __percpu *pmu_disable_count;
|
||||
struct perf_cpu_pmu_context __percpu *cpu_pmu_context;
|
||||
atomic_t exclusive_cnt; /* < 0: cpu; > 0: tsk */
|
||||
--- a/kernel/events/core.c
|
||||
+++ b/kernel/events/core.c
|
||||
@@ -411,6 +411,11 @@ static LIST_HEAD(pmus);
|
||||
static DEFINE_MUTEX(pmus_lock);
|
||||
static struct srcu_struct pmus_srcu;
|
||||
static cpumask_var_t perf_online_mask;
|
||||
+static cpumask_var_t perf_online_core_mask;
|
||||
+static cpumask_var_t perf_online_die_mask;
|
||||
+static cpumask_var_t perf_online_cluster_mask;
|
||||
+static cpumask_var_t perf_online_pkg_mask;
|
||||
+static cpumask_var_t perf_online_sys_mask;
|
||||
static struct kmem_cache *perf_event_cache;
|
||||
|
||||
/*
|
||||
@@ -11497,10 +11502,60 @@ perf_event_mux_interval_ms_store(struct
|
||||
}
|
||||
static DEVICE_ATTR_RW(perf_event_mux_interval_ms);
|
||||
|
||||
+static inline const struct cpumask *perf_scope_cpu_topology_cpumask(unsigned int scope, int cpu)
|
||||
+{
|
||||
+ switch (scope) {
|
||||
+ case PERF_PMU_SCOPE_CORE:
|
||||
+ return topology_sibling_cpumask(cpu);
|
||||
+ case PERF_PMU_SCOPE_DIE:
|
||||
+ return topology_die_cpumask(cpu);
|
||||
+ case PERF_PMU_SCOPE_CLUSTER:
|
||||
+ return topology_cluster_cpumask(cpu);
|
||||
+ case PERF_PMU_SCOPE_PKG:
|
||||
+ return topology_core_cpumask(cpu);
|
||||
+ case PERF_PMU_SCOPE_SYS_WIDE:
|
||||
+ return cpu_online_mask;
|
||||
+ }
|
||||
+
|
||||
+ return NULL;
|
||||
+}
|
||||
+
|
||||
+static inline struct cpumask *perf_scope_cpumask(unsigned int scope)
|
||||
+{
|
||||
+ switch (scope) {
|
||||
+ case PERF_PMU_SCOPE_CORE:
|
||||
+ return perf_online_core_mask;
|
||||
+ case PERF_PMU_SCOPE_DIE:
|
||||
+ return perf_online_die_mask;
|
||||
+ case PERF_PMU_SCOPE_CLUSTER:
|
||||
+ return perf_online_cluster_mask;
|
||||
+ case PERF_PMU_SCOPE_PKG:
|
||||
+ return perf_online_pkg_mask;
|
||||
+ case PERF_PMU_SCOPE_SYS_WIDE:
|
||||
+ return perf_online_sys_mask;
|
||||
+ }
|
||||
+
|
||||
+ return NULL;
|
||||
+}
|
||||
+
|
||||
+static ssize_t cpumask_show(struct device *dev, struct device_attribute *attr,
|
||||
+ char *buf)
|
||||
+{
|
||||
+ struct pmu *pmu = dev_get_drvdata(dev);
|
||||
+ struct cpumask *mask = perf_scope_cpumask(pmu->scope);
|
||||
+
|
||||
+ if (mask)
|
||||
+ return cpumap_print_to_pagebuf(true, buf, mask);
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
||||
+static DEVICE_ATTR_RO(cpumask);
|
||||
+
|
||||
static struct attribute *pmu_dev_attrs[] = {
|
||||
&dev_attr_type.attr,
|
||||
&dev_attr_perf_event_mux_interval_ms.attr,
|
||||
&dev_attr_nr_addr_filters.attr,
|
||||
+ &dev_attr_cpumask.attr,
|
||||
NULL,
|
||||
};
|
||||
|
||||
@@ -11512,6 +11567,10 @@ static umode_t pmu_dev_is_visible(struct
|
||||
if (n == 2 && !pmu->nr_addr_filters)
|
||||
return 0;
|
||||
|
||||
+ /* cpumask */
|
||||
+ if (n == 3 && pmu->scope == PERF_PMU_SCOPE_NONE)
|
||||
+ return 0;
|
||||
+
|
||||
return a->mode;
|
||||
}
|
||||
|
||||
@@ -11596,6 +11655,11 @@ int perf_pmu_register(struct pmu *pmu, c
|
||||
goto free_pdc;
|
||||
}
|
||||
|
||||
+ if (WARN_ONCE(pmu->scope >= PERF_PMU_MAX_SCOPE, "Can not register a pmu with an invalid scope.\n")) {
|
||||
+ ret = -EINVAL;
|
||||
+ goto free_pdc;
|
||||
+ }
|
||||
+
|
||||
pmu->name = name;
|
||||
|
||||
if (type >= 0)
|
||||
@@ -11750,6 +11814,22 @@ static int perf_try_init_event(struct pm
|
||||
event_has_any_exclude_flag(event))
|
||||
ret = -EINVAL;
|
||||
|
||||
+ if (pmu->scope != PERF_PMU_SCOPE_NONE && event->cpu >= 0) {
|
||||
+ const struct cpumask *cpumask = perf_scope_cpu_topology_cpumask(pmu->scope, event->cpu);
|
||||
+ struct cpumask *pmu_cpumask = perf_scope_cpumask(pmu->scope);
|
||||
+ int cpu;
|
||||
+
|
||||
+ if (pmu_cpumask && cpumask) {
|
||||
+ cpu = cpumask_any_and(pmu_cpumask, cpumask);
|
||||
+ if (cpu >= nr_cpu_ids)
|
||||
+ ret = -ENODEV;
|
||||
+ else
|
||||
+ event->cpu = cpu;
|
||||
+ } else {
|
||||
+ ret = -ENODEV;
|
||||
+ }
|
||||
+ }
|
||||
+
|
||||
if (ret && event->destroy)
|
||||
event->destroy(event);
|
||||
}
|
||||
@@ -13713,6 +13793,12 @@ static void __init perf_event_init_all_c
|
||||
int cpu;
|
||||
|
||||
zalloc_cpumask_var(&perf_online_mask, GFP_KERNEL);
|
||||
+ zalloc_cpumask_var(&perf_online_core_mask, GFP_KERNEL);
|
||||
+ zalloc_cpumask_var(&perf_online_die_mask, GFP_KERNEL);
|
||||
+ zalloc_cpumask_var(&perf_online_cluster_mask, GFP_KERNEL);
|
||||
+ zalloc_cpumask_var(&perf_online_pkg_mask, GFP_KERNEL);
|
||||
+ zalloc_cpumask_var(&perf_online_sys_mask, GFP_KERNEL);
|
||||
+
|
||||
|
||||
for_each_possible_cpu(cpu) {
|
||||
swhash = &per_cpu(swevent_htable, cpu);
|
||||
@@ -13762,6 +13848,40 @@ static void __perf_event_exit_context(vo
|
||||
raw_spin_unlock(&ctx->lock);
|
||||
}
|
||||
|
||||
+static void perf_event_clear_cpumask(unsigned int cpu)
|
||||
+{
|
||||
+ int target[PERF_PMU_MAX_SCOPE];
|
||||
+ unsigned int scope;
|
||||
+ struct pmu *pmu;
|
||||
+
|
||||
+ cpumask_clear_cpu(cpu, perf_online_mask);
|
||||
+
|
||||
+ for (scope = PERF_PMU_SCOPE_NONE + 1; scope < PERF_PMU_MAX_SCOPE; scope++) {
|
||||
+ const struct cpumask *cpumask = perf_scope_cpu_topology_cpumask(scope, cpu);
|
||||
+ struct cpumask *pmu_cpumask = perf_scope_cpumask(scope);
|
||||
+
|
||||
+ target[scope] = -1;
|
||||
+ if (WARN_ON_ONCE(!pmu_cpumask || !cpumask))
|
||||
+ continue;
|
||||
+
|
||||
+ if (!cpumask_test_and_clear_cpu(cpu, pmu_cpumask))
|
||||
+ continue;
|
||||
+ target[scope] = cpumask_any_but(cpumask, cpu);
|
||||
+ if (target[scope] < nr_cpu_ids)
|
||||
+ cpumask_set_cpu(target[scope], pmu_cpumask);
|
||||
+ }
|
||||
+
|
||||
+ /* migrate */
|
||||
+ list_for_each_entry_rcu(pmu, &pmus, entry, lockdep_is_held(&pmus_srcu)) {
|
||||
+ if (pmu->scope == PERF_PMU_SCOPE_NONE ||
|
||||
+ WARN_ON_ONCE(pmu->scope >= PERF_PMU_MAX_SCOPE))
|
||||
+ continue;
|
||||
+
|
||||
+ if (target[pmu->scope] >= 0 && target[pmu->scope] < nr_cpu_ids)
|
||||
+ perf_pmu_migrate_context(pmu, cpu, target[pmu->scope]);
|
||||
+ }
|
||||
+}
|
||||
+
|
||||
static void perf_event_exit_cpu_context(int cpu)
|
||||
{
|
||||
struct perf_cpu_context *cpuctx;
|
||||
@@ -13769,6 +13889,11 @@ static void perf_event_exit_cpu_context(
|
||||
|
||||
// XXX simplify cpuctx->online
|
||||
mutex_lock(&pmus_lock);
|
||||
+ /*
|
||||
+ * Clear the cpumasks, and migrate to other CPUs if possible.
|
||||
+ * Must be invoked before the __perf_event_exit_context.
|
||||
+ */
|
||||
+ perf_event_clear_cpumask(cpu);
|
||||
cpuctx = per_cpu_ptr(&perf_cpu_context, cpu);
|
||||
ctx = &cpuctx->ctx;
|
||||
|
||||
@@ -13776,7 +13901,6 @@ static void perf_event_exit_cpu_context(
|
||||
smp_call_function_single(cpu, __perf_event_exit_context, ctx, 1);
|
||||
cpuctx->online = 0;
|
||||
mutex_unlock(&ctx->mutex);
|
||||
- cpumask_clear_cpu(cpu, perf_online_mask);
|
||||
mutex_unlock(&pmus_lock);
|
||||
}
|
||||
#else
|
||||
@@ -13785,6 +13909,42 @@ static void perf_event_exit_cpu_context(
|
||||
|
||||
#endif
|
||||
|
||||
+static void perf_event_setup_cpumask(unsigned int cpu)
|
||||
+{
|
||||
+ struct cpumask *pmu_cpumask;
|
||||
+ unsigned int scope;
|
||||
+
|
||||
+ cpumask_set_cpu(cpu, perf_online_mask);
|
||||
+
|
||||
+ /*
|
||||
+ * Early boot stage, the cpumask hasn't been set yet.
|
||||
+ * The perf_online_<domain>_masks includes the first CPU of each domain.
|
||||
+ * Always uncondifionally set the boot CPU for the perf_online_<domain>_masks.
|
||||
+ */
|
||||
+ if (!topology_sibling_cpumask(cpu)) {
|
||||
+ for (scope = PERF_PMU_SCOPE_NONE + 1; scope < PERF_PMU_MAX_SCOPE; scope++) {
|
||||
+ pmu_cpumask = perf_scope_cpumask(scope);
|
||||
+ if (WARN_ON_ONCE(!pmu_cpumask))
|
||||
+ continue;
|
||||
+ cpumask_set_cpu(cpu, pmu_cpumask);
|
||||
+ }
|
||||
+ return;
|
||||
+ }
|
||||
+
|
||||
+ for (scope = PERF_PMU_SCOPE_NONE + 1; scope < PERF_PMU_MAX_SCOPE; scope++) {
|
||||
+ const struct cpumask *cpumask = perf_scope_cpu_topology_cpumask(scope, cpu);
|
||||
+
|
||||
+ pmu_cpumask = perf_scope_cpumask(scope);
|
||||
+
|
||||
+ if (WARN_ON_ONCE(!pmu_cpumask || !cpumask))
|
||||
+ continue;
|
||||
+
|
||||
+ if (!cpumask_empty(cpumask) &&
|
||||
+ cpumask_any_and(pmu_cpumask, cpumask) >= nr_cpu_ids)
|
||||
+ cpumask_set_cpu(cpu, pmu_cpumask);
|
||||
+ }
|
||||
+}
|
||||
+
|
||||
int perf_event_init_cpu(unsigned int cpu)
|
||||
{
|
||||
struct perf_cpu_context *cpuctx;
|
||||
@@ -13793,7 +13953,7 @@ int perf_event_init_cpu(unsigned int cpu
|
||||
perf_swevent_init_cpu(cpu);
|
||||
|
||||
mutex_lock(&pmus_lock);
|
||||
- cpumask_set_cpu(cpu, perf_online_mask);
|
||||
+ perf_event_setup_cpumask(cpu);
|
||||
cpuctx = per_cpu_ptr(&perf_cpu_context, cpu);
|
||||
ctx = &cpuctx->ctx;
|
||||
|
71
debian/patches/patchset-pf/amd-rapl/0002-perf-Add-PERF_EV_CAP_READ_SCOPE.patch
vendored
Normal file
71
debian/patches/patchset-pf/amd-rapl/0002-perf-Add-PERF_EV_CAP_READ_SCOPE.patch
vendored
Normal file
@ -0,0 +1,71 @@
|
||||
From 8c7eb17e722a6a45c4436e5debb9336089b21d9b Mon Sep 17 00:00:00 2001
|
||||
From: Kan Liang <kan.liang@linux.intel.com>
|
||||
Date: Fri, 2 Aug 2024 08:16:38 -0700
|
||||
Subject: perf: Add PERF_EV_CAP_READ_SCOPE
|
||||
|
||||
Usually, an event can be read from any CPU of the scope. It doesn't need
|
||||
to be read from the advertised CPU.
|
||||
|
||||
Add a new event cap, PERF_EV_CAP_READ_SCOPE. An event of a PMU with
|
||||
scope can be read from any active CPU in the scope.
|
||||
|
||||
Signed-off-by: Kan Liang <kan.liang@linux.intel.com>
|
||||
---
|
||||
include/linux/perf_event.h | 3 +++
|
||||
kernel/events/core.c | 14 +++++++++++---
|
||||
2 files changed, 14 insertions(+), 3 deletions(-)
|
||||
|
||||
--- a/include/linux/perf_event.h
|
||||
+++ b/include/linux/perf_event.h
|
||||
@@ -633,10 +633,13 @@ typedef void (*perf_overflow_handler_t)(
|
||||
* PERF_EV_CAP_SIBLING: An event with this flag must be a group sibling and
|
||||
* cannot be a group leader. If an event with this flag is detached from the
|
||||
* group it is scheduled out and moved into an unrecoverable ERROR state.
|
||||
+ * PERF_EV_CAP_READ_SCOPE: A CPU event that can be read from any CPU of the
|
||||
+ * PMU scope where it is active.
|
||||
*/
|
||||
#define PERF_EV_CAP_SOFTWARE BIT(0)
|
||||
#define PERF_EV_CAP_READ_ACTIVE_PKG BIT(1)
|
||||
#define PERF_EV_CAP_SIBLING BIT(2)
|
||||
+#define PERF_EV_CAP_READ_SCOPE BIT(3)
|
||||
|
||||
#define SWEVENT_HLIST_BITS 8
|
||||
#define SWEVENT_HLIST_SIZE (1 << SWEVENT_HLIST_BITS)
|
||||
--- a/kernel/events/core.c
|
||||
+++ b/kernel/events/core.c
|
||||
@@ -4477,16 +4477,24 @@ struct perf_read_data {
|
||||
int ret;
|
||||
};
|
||||
|
||||
+static inline const struct cpumask *perf_scope_cpu_topology_cpumask(unsigned int scope, int cpu);
|
||||
+
|
||||
static int __perf_event_read_cpu(struct perf_event *event, int event_cpu)
|
||||
{
|
||||
+ int local_cpu = smp_processor_id();
|
||||
u16 local_pkg, event_pkg;
|
||||
|
||||
if ((unsigned)event_cpu >= nr_cpu_ids)
|
||||
return event_cpu;
|
||||
|
||||
- if (event->group_caps & PERF_EV_CAP_READ_ACTIVE_PKG) {
|
||||
- int local_cpu = smp_processor_id();
|
||||
+ if (event->group_caps & PERF_EV_CAP_READ_SCOPE) {
|
||||
+ const struct cpumask *cpumask = perf_scope_cpu_topology_cpumask(event->pmu->scope, event_cpu);
|
||||
+
|
||||
+ if (cpumask && cpumask_test_cpu(local_cpu, cpumask))
|
||||
+ return local_cpu;
|
||||
+ }
|
||||
|
||||
+ if (event->group_caps & PERF_EV_CAP_READ_ACTIVE_PKG) {
|
||||
event_pkg = topology_physical_package_id(event_cpu);
|
||||
local_pkg = topology_physical_package_id(local_cpu);
|
||||
|
||||
@@ -11824,7 +11832,7 @@ static int perf_try_init_event(struct pm
|
||||
if (cpu >= nr_cpu_ids)
|
||||
ret = -ENODEV;
|
||||
else
|
||||
- event->cpu = cpu;
|
||||
+ event->event_caps |= PERF_EV_CAP_READ_SCOPE;
|
||||
} else {
|
||||
ret = -ENODEV;
|
||||
}
|
286
debian/patches/patchset-pf/amd-rapl/0003-perf-x86-intel-cstate-Clean-up-cpumask-and-hotplug.patch
vendored
Normal file
286
debian/patches/patchset-pf/amd-rapl/0003-perf-x86-intel-cstate-Clean-up-cpumask-and-hotplug.patch
vendored
Normal file
@ -0,0 +1,286 @@
|
||||
From 09c1529eb102b486220c35546f2663ca858a2943 Mon Sep 17 00:00:00 2001
|
||||
From: Kan Liang <kan.liang@linux.intel.com>
|
||||
Date: Fri, 2 Aug 2024 08:16:39 -0700
|
||||
Subject: perf/x86/intel/cstate: Clean up cpumask and hotplug
|
||||
|
||||
There are three cstate PMUs with different scopes, core, die and module.
|
||||
The scopes are supported by the generic perf_event subsystem now.
|
||||
|
||||
Set the scope for each PMU and remove all the cpumask and hotplug codes.
|
||||
|
||||
Signed-off-by: Kan Liang <kan.liang@linux.intel.com>
|
||||
---
|
||||
arch/x86/events/intel/cstate.c | 142 ++-------------------------------
|
||||
include/linux/cpuhotplug.h | 2 -
|
||||
2 files changed, 5 insertions(+), 139 deletions(-)
|
||||
|
||||
--- a/arch/x86/events/intel/cstate.c
|
||||
+++ b/arch/x86/events/intel/cstate.c
|
||||
@@ -128,10 +128,6 @@ static ssize_t __cstate_##_var##_show(st
|
||||
static struct device_attribute format_attr_##_var = \
|
||||
__ATTR(_name, 0444, __cstate_##_var##_show, NULL)
|
||||
|
||||
-static ssize_t cstate_get_attr_cpumask(struct device *dev,
|
||||
- struct device_attribute *attr,
|
||||
- char *buf);
|
||||
-
|
||||
/* Model -> events mapping */
|
||||
struct cstate_model {
|
||||
unsigned long core_events;
|
||||
@@ -206,22 +202,9 @@ static struct attribute_group cstate_for
|
||||
.attrs = cstate_format_attrs,
|
||||
};
|
||||
|
||||
-static cpumask_t cstate_core_cpu_mask;
|
||||
-static DEVICE_ATTR(cpumask, S_IRUGO, cstate_get_attr_cpumask, NULL);
|
||||
-
|
||||
-static struct attribute *cstate_cpumask_attrs[] = {
|
||||
- &dev_attr_cpumask.attr,
|
||||
- NULL,
|
||||
-};
|
||||
-
|
||||
-static struct attribute_group cpumask_attr_group = {
|
||||
- .attrs = cstate_cpumask_attrs,
|
||||
-};
|
||||
-
|
||||
static const struct attribute_group *cstate_attr_groups[] = {
|
||||
&cstate_events_attr_group,
|
||||
&cstate_format_attr_group,
|
||||
- &cpumask_attr_group,
|
||||
NULL,
|
||||
};
|
||||
|
||||
@@ -269,8 +252,6 @@ static struct perf_msr pkg_msr[] = {
|
||||
[PERF_CSTATE_PKG_C10_RES] = { MSR_PKG_C10_RESIDENCY, &group_cstate_pkg_c10, test_msr },
|
||||
};
|
||||
|
||||
-static cpumask_t cstate_pkg_cpu_mask;
|
||||
-
|
||||
/* cstate_module PMU */
|
||||
static struct pmu cstate_module_pmu;
|
||||
static bool has_cstate_module;
|
||||
@@ -291,28 +272,9 @@ static struct perf_msr module_msr[] = {
|
||||
[PERF_CSTATE_MODULE_C6_RES] = { MSR_MODULE_C6_RES_MS, &group_cstate_module_c6, test_msr },
|
||||
};
|
||||
|
||||
-static cpumask_t cstate_module_cpu_mask;
|
||||
-
|
||||
-static ssize_t cstate_get_attr_cpumask(struct device *dev,
|
||||
- struct device_attribute *attr,
|
||||
- char *buf)
|
||||
-{
|
||||
- struct pmu *pmu = dev_get_drvdata(dev);
|
||||
-
|
||||
- if (pmu == &cstate_core_pmu)
|
||||
- return cpumap_print_to_pagebuf(true, buf, &cstate_core_cpu_mask);
|
||||
- else if (pmu == &cstate_pkg_pmu)
|
||||
- return cpumap_print_to_pagebuf(true, buf, &cstate_pkg_cpu_mask);
|
||||
- else if (pmu == &cstate_module_pmu)
|
||||
- return cpumap_print_to_pagebuf(true, buf, &cstate_module_cpu_mask);
|
||||
- else
|
||||
- return 0;
|
||||
-}
|
||||
-
|
||||
static int cstate_pmu_event_init(struct perf_event *event)
|
||||
{
|
||||
u64 cfg = event->attr.config;
|
||||
- int cpu;
|
||||
|
||||
if (event->attr.type != event->pmu->type)
|
||||
return -ENOENT;
|
||||
@@ -331,20 +293,13 @@ static int cstate_pmu_event_init(struct
|
||||
if (!(core_msr_mask & (1 << cfg)))
|
||||
return -EINVAL;
|
||||
event->hw.event_base = core_msr[cfg].msr;
|
||||
- cpu = cpumask_any_and(&cstate_core_cpu_mask,
|
||||
- topology_sibling_cpumask(event->cpu));
|
||||
} else if (event->pmu == &cstate_pkg_pmu) {
|
||||
if (cfg >= PERF_CSTATE_PKG_EVENT_MAX)
|
||||
return -EINVAL;
|
||||
cfg = array_index_nospec((unsigned long)cfg, PERF_CSTATE_PKG_EVENT_MAX);
|
||||
if (!(pkg_msr_mask & (1 << cfg)))
|
||||
return -EINVAL;
|
||||
-
|
||||
- event->event_caps |= PERF_EV_CAP_READ_ACTIVE_PKG;
|
||||
-
|
||||
event->hw.event_base = pkg_msr[cfg].msr;
|
||||
- cpu = cpumask_any_and(&cstate_pkg_cpu_mask,
|
||||
- topology_die_cpumask(event->cpu));
|
||||
} else if (event->pmu == &cstate_module_pmu) {
|
||||
if (cfg >= PERF_CSTATE_MODULE_EVENT_MAX)
|
||||
return -EINVAL;
|
||||
@@ -352,16 +307,10 @@ static int cstate_pmu_event_init(struct
|
||||
if (!(module_msr_mask & (1 << cfg)))
|
||||
return -EINVAL;
|
||||
event->hw.event_base = module_msr[cfg].msr;
|
||||
- cpu = cpumask_any_and(&cstate_module_cpu_mask,
|
||||
- topology_cluster_cpumask(event->cpu));
|
||||
} else {
|
||||
return -ENOENT;
|
||||
}
|
||||
|
||||
- if (cpu >= nr_cpu_ids)
|
||||
- return -ENODEV;
|
||||
-
|
||||
- event->cpu = cpu;
|
||||
event->hw.config = cfg;
|
||||
event->hw.idx = -1;
|
||||
return 0;
|
||||
@@ -412,84 +361,6 @@ static int cstate_pmu_event_add(struct p
|
||||
return 0;
|
||||
}
|
||||
|
||||
-/*
|
||||
- * Check if exiting cpu is the designated reader. If so migrate the
|
||||
- * events when there is a valid target available
|
||||
- */
|
||||
-static int cstate_cpu_exit(unsigned int cpu)
|
||||
-{
|
||||
- unsigned int target;
|
||||
-
|
||||
- if (has_cstate_core &&
|
||||
- cpumask_test_and_clear_cpu(cpu, &cstate_core_cpu_mask)) {
|
||||
-
|
||||
- target = cpumask_any_but(topology_sibling_cpumask(cpu), cpu);
|
||||
- /* Migrate events if there is a valid target */
|
||||
- if (target < nr_cpu_ids) {
|
||||
- cpumask_set_cpu(target, &cstate_core_cpu_mask);
|
||||
- perf_pmu_migrate_context(&cstate_core_pmu, cpu, target);
|
||||
- }
|
||||
- }
|
||||
-
|
||||
- if (has_cstate_pkg &&
|
||||
- cpumask_test_and_clear_cpu(cpu, &cstate_pkg_cpu_mask)) {
|
||||
-
|
||||
- target = cpumask_any_but(topology_die_cpumask(cpu), cpu);
|
||||
- /* Migrate events if there is a valid target */
|
||||
- if (target < nr_cpu_ids) {
|
||||
- cpumask_set_cpu(target, &cstate_pkg_cpu_mask);
|
||||
- perf_pmu_migrate_context(&cstate_pkg_pmu, cpu, target);
|
||||
- }
|
||||
- }
|
||||
-
|
||||
- if (has_cstate_module &&
|
||||
- cpumask_test_and_clear_cpu(cpu, &cstate_module_cpu_mask)) {
|
||||
-
|
||||
- target = cpumask_any_but(topology_cluster_cpumask(cpu), cpu);
|
||||
- /* Migrate events if there is a valid target */
|
||||
- if (target < nr_cpu_ids) {
|
||||
- cpumask_set_cpu(target, &cstate_module_cpu_mask);
|
||||
- perf_pmu_migrate_context(&cstate_module_pmu, cpu, target);
|
||||
- }
|
||||
- }
|
||||
- return 0;
|
||||
-}
|
||||
-
|
||||
-static int cstate_cpu_init(unsigned int cpu)
|
||||
-{
|
||||
- unsigned int target;
|
||||
-
|
||||
- /*
|
||||
- * If this is the first online thread of that core, set it in
|
||||
- * the core cpu mask as the designated reader.
|
||||
- */
|
||||
- target = cpumask_any_and(&cstate_core_cpu_mask,
|
||||
- topology_sibling_cpumask(cpu));
|
||||
-
|
||||
- if (has_cstate_core && target >= nr_cpu_ids)
|
||||
- cpumask_set_cpu(cpu, &cstate_core_cpu_mask);
|
||||
-
|
||||
- /*
|
||||
- * If this is the first online thread of that package, set it
|
||||
- * in the package cpu mask as the designated reader.
|
||||
- */
|
||||
- target = cpumask_any_and(&cstate_pkg_cpu_mask,
|
||||
- topology_die_cpumask(cpu));
|
||||
- if (has_cstate_pkg && target >= nr_cpu_ids)
|
||||
- cpumask_set_cpu(cpu, &cstate_pkg_cpu_mask);
|
||||
-
|
||||
- /*
|
||||
- * If this is the first online thread of that cluster, set it
|
||||
- * in the cluster cpu mask as the designated reader.
|
||||
- */
|
||||
- target = cpumask_any_and(&cstate_module_cpu_mask,
|
||||
- topology_cluster_cpumask(cpu));
|
||||
- if (has_cstate_module && target >= nr_cpu_ids)
|
||||
- cpumask_set_cpu(cpu, &cstate_module_cpu_mask);
|
||||
-
|
||||
- return 0;
|
||||
-}
|
||||
-
|
||||
static const struct attribute_group *core_attr_update[] = {
|
||||
&group_cstate_core_c1,
|
||||
&group_cstate_core_c3,
|
||||
@@ -526,6 +397,7 @@ static struct pmu cstate_core_pmu = {
|
||||
.stop = cstate_pmu_event_stop,
|
||||
.read = cstate_pmu_event_update,
|
||||
.capabilities = PERF_PMU_CAP_NO_INTERRUPT | PERF_PMU_CAP_NO_EXCLUDE,
|
||||
+ .scope = PERF_PMU_SCOPE_CORE,
|
||||
.module = THIS_MODULE,
|
||||
};
|
||||
|
||||
@@ -541,6 +413,7 @@ static struct pmu cstate_pkg_pmu = {
|
||||
.stop = cstate_pmu_event_stop,
|
||||
.read = cstate_pmu_event_update,
|
||||
.capabilities = PERF_PMU_CAP_NO_INTERRUPT | PERF_PMU_CAP_NO_EXCLUDE,
|
||||
+ .scope = PERF_PMU_SCOPE_PKG,
|
||||
.module = THIS_MODULE,
|
||||
};
|
||||
|
||||
@@ -556,6 +429,7 @@ static struct pmu cstate_module_pmu = {
|
||||
.stop = cstate_pmu_event_stop,
|
||||
.read = cstate_pmu_event_update,
|
||||
.capabilities = PERF_PMU_CAP_NO_INTERRUPT | PERF_PMU_CAP_NO_EXCLUDE,
|
||||
+ .scope = PERF_PMU_SCOPE_CLUSTER,
|
||||
.module = THIS_MODULE,
|
||||
};
|
||||
|
||||
@@ -810,9 +684,6 @@ static int __init cstate_probe(const str
|
||||
|
||||
static inline void cstate_cleanup(void)
|
||||
{
|
||||
- cpuhp_remove_state_nocalls(CPUHP_AP_PERF_X86_CSTATE_ONLINE);
|
||||
- cpuhp_remove_state_nocalls(CPUHP_AP_PERF_X86_CSTATE_STARTING);
|
||||
-
|
||||
if (has_cstate_core)
|
||||
perf_pmu_unregister(&cstate_core_pmu);
|
||||
|
||||
@@ -827,11 +698,6 @@ static int __init cstate_init(void)
|
||||
{
|
||||
int err;
|
||||
|
||||
- cpuhp_setup_state(CPUHP_AP_PERF_X86_CSTATE_STARTING,
|
||||
- "perf/x86/cstate:starting", cstate_cpu_init, NULL);
|
||||
- cpuhp_setup_state(CPUHP_AP_PERF_X86_CSTATE_ONLINE,
|
||||
- "perf/x86/cstate:online", NULL, cstate_cpu_exit);
|
||||
-
|
||||
if (has_cstate_core) {
|
||||
err = perf_pmu_register(&cstate_core_pmu, cstate_core_pmu.name, -1);
|
||||
if (err) {
|
||||
@@ -844,6 +710,8 @@ static int __init cstate_init(void)
|
||||
|
||||
if (has_cstate_pkg) {
|
||||
if (topology_max_dies_per_package() > 1) {
|
||||
+ /* CLX-AP is multi-die and the cstate is die-scope */
|
||||
+ cstate_pkg_pmu.scope = PERF_PMU_SCOPE_DIE;
|
||||
err = perf_pmu_register(&cstate_pkg_pmu,
|
||||
"cstate_die", -1);
|
||||
} else {
|
||||
--- a/include/linux/cpuhotplug.h
|
||||
+++ b/include/linux/cpuhotplug.h
|
||||
@@ -152,7 +152,6 @@ enum cpuhp_state {
|
||||
CPUHP_AP_PERF_X86_AMD_UNCORE_STARTING,
|
||||
CPUHP_AP_PERF_X86_STARTING,
|
||||
CPUHP_AP_PERF_X86_AMD_IBS_STARTING,
|
||||
- CPUHP_AP_PERF_X86_CSTATE_STARTING,
|
||||
CPUHP_AP_PERF_XTENSA_STARTING,
|
||||
CPUHP_AP_ARM_VFP_STARTING,
|
||||
CPUHP_AP_ARM64_DEBUG_MONITORS_STARTING,
|
||||
@@ -209,7 +208,6 @@ enum cpuhp_state {
|
||||
CPUHP_AP_PERF_X86_AMD_UNCORE_ONLINE,
|
||||
CPUHP_AP_PERF_X86_AMD_POWER_ONLINE,
|
||||
CPUHP_AP_PERF_X86_RAPL_ONLINE,
|
||||
- CPUHP_AP_PERF_X86_CSTATE_ONLINE,
|
||||
CPUHP_AP_PERF_S390_CF_ONLINE,
|
||||
CPUHP_AP_PERF_S390_SF_ONLINE,
|
||||
CPUHP_AP_PERF_ARM_CCI_ONLINE,
|
188
debian/patches/patchset-pf/amd-rapl/0004-iommu-vt-d-Clean-up-cpumask-and-hotplug-for-perfmon.patch
vendored
Normal file
188
debian/patches/patchset-pf/amd-rapl/0004-iommu-vt-d-Clean-up-cpumask-and-hotplug-for-perfmon.patch
vendored
Normal file
@ -0,0 +1,188 @@
|
||||
From f91da33af8295b4b3d73a2083225f69e1d5ff301 Mon Sep 17 00:00:00 2001
|
||||
From: Kan Liang <kan.liang@linux.intel.com>
|
||||
Date: Fri, 2 Aug 2024 08:16:40 -0700
|
||||
Subject: iommu/vt-d: Clean up cpumask and hotplug for perfmon
|
||||
|
||||
The iommu PMU is system-wide scope, which is supported by the generic
|
||||
perf_event subsystem now.
|
||||
|
||||
Set the scope for the iommu PMU and remove all the cpumask and hotplug
|
||||
codes.
|
||||
|
||||
Reviewed-by: Lu Baolu <baolu.lu@linux.intel.com>
|
||||
Signed-off-by: Kan Liang <kan.liang@linux.intel.com>
|
||||
Cc: David Woodhouse <dwmw2@infradead.org>
|
||||
Cc: Joerg Roedel <joro@8bytes.org>
|
||||
Cc: Will Deacon <will@kernel.org>
|
||||
Cc: iommu@lists.linux.dev
|
||||
---
|
||||
drivers/iommu/intel/iommu.h | 2 -
|
||||
drivers/iommu/intel/perfmon.c | 111 +---------------------------------
|
||||
2 files changed, 2 insertions(+), 111 deletions(-)
|
||||
|
||||
--- a/drivers/iommu/intel/iommu.h
|
||||
+++ b/drivers/iommu/intel/iommu.h
|
||||
@@ -687,8 +687,6 @@ struct iommu_pmu {
|
||||
DECLARE_BITMAP(used_mask, IOMMU_PMU_IDX_MAX);
|
||||
struct perf_event *event_list[IOMMU_PMU_IDX_MAX];
|
||||
unsigned char irq_name[16];
|
||||
- struct hlist_node cpuhp_node;
|
||||
- int cpu;
|
||||
};
|
||||
|
||||
#define IOMMU_IRQ_ID_OFFSET_PRQ (DMAR_UNITS_SUPPORTED)
|
||||
--- a/drivers/iommu/intel/perfmon.c
|
||||
+++ b/drivers/iommu/intel/perfmon.c
|
||||
@@ -34,28 +34,9 @@ static struct attribute_group iommu_pmu_
|
||||
.attrs = attrs_empty,
|
||||
};
|
||||
|
||||
-static cpumask_t iommu_pmu_cpu_mask;
|
||||
-
|
||||
-static ssize_t
|
||||
-cpumask_show(struct device *dev, struct device_attribute *attr, char *buf)
|
||||
-{
|
||||
- return cpumap_print_to_pagebuf(true, buf, &iommu_pmu_cpu_mask);
|
||||
-}
|
||||
-static DEVICE_ATTR_RO(cpumask);
|
||||
-
|
||||
-static struct attribute *iommu_pmu_cpumask_attrs[] = {
|
||||
- &dev_attr_cpumask.attr,
|
||||
- NULL
|
||||
-};
|
||||
-
|
||||
-static struct attribute_group iommu_pmu_cpumask_attr_group = {
|
||||
- .attrs = iommu_pmu_cpumask_attrs,
|
||||
-};
|
||||
-
|
||||
static const struct attribute_group *iommu_pmu_attr_groups[] = {
|
||||
&iommu_pmu_format_attr_group,
|
||||
&iommu_pmu_events_attr_group,
|
||||
- &iommu_pmu_cpumask_attr_group,
|
||||
NULL
|
||||
};
|
||||
|
||||
@@ -565,6 +546,7 @@ static int __iommu_pmu_register(struct i
|
||||
iommu_pmu->pmu.attr_groups = iommu_pmu_attr_groups;
|
||||
iommu_pmu->pmu.attr_update = iommu_pmu_attr_update;
|
||||
iommu_pmu->pmu.capabilities = PERF_PMU_CAP_NO_EXCLUDE;
|
||||
+ iommu_pmu->pmu.scope = PERF_PMU_SCOPE_SYS_WIDE;
|
||||
iommu_pmu->pmu.module = THIS_MODULE;
|
||||
|
||||
return perf_pmu_register(&iommu_pmu->pmu, iommu_pmu->pmu.name, -1);
|
||||
@@ -773,89 +755,6 @@ static void iommu_pmu_unset_interrupt(st
|
||||
iommu->perf_irq = 0;
|
||||
}
|
||||
|
||||
-static int iommu_pmu_cpu_online(unsigned int cpu, struct hlist_node *node)
|
||||
-{
|
||||
- struct iommu_pmu *iommu_pmu = hlist_entry_safe(node, typeof(*iommu_pmu), cpuhp_node);
|
||||
-
|
||||
- if (cpumask_empty(&iommu_pmu_cpu_mask))
|
||||
- cpumask_set_cpu(cpu, &iommu_pmu_cpu_mask);
|
||||
-
|
||||
- if (cpumask_test_cpu(cpu, &iommu_pmu_cpu_mask))
|
||||
- iommu_pmu->cpu = cpu;
|
||||
-
|
||||
- return 0;
|
||||
-}
|
||||
-
|
||||
-static int iommu_pmu_cpu_offline(unsigned int cpu, struct hlist_node *node)
|
||||
-{
|
||||
- struct iommu_pmu *iommu_pmu = hlist_entry_safe(node, typeof(*iommu_pmu), cpuhp_node);
|
||||
- int target = cpumask_first(&iommu_pmu_cpu_mask);
|
||||
-
|
||||
- /*
|
||||
- * The iommu_pmu_cpu_mask has been updated when offline the CPU
|
||||
- * for the first iommu_pmu. Migrate the other iommu_pmu to the
|
||||
- * new target.
|
||||
- */
|
||||
- if (target < nr_cpu_ids && target != iommu_pmu->cpu) {
|
||||
- perf_pmu_migrate_context(&iommu_pmu->pmu, cpu, target);
|
||||
- iommu_pmu->cpu = target;
|
||||
- return 0;
|
||||
- }
|
||||
-
|
||||
- if (!cpumask_test_and_clear_cpu(cpu, &iommu_pmu_cpu_mask))
|
||||
- return 0;
|
||||
-
|
||||
- target = cpumask_any_but(cpu_online_mask, cpu);
|
||||
-
|
||||
- if (target < nr_cpu_ids)
|
||||
- cpumask_set_cpu(target, &iommu_pmu_cpu_mask);
|
||||
- else
|
||||
- return 0;
|
||||
-
|
||||
- perf_pmu_migrate_context(&iommu_pmu->pmu, cpu, target);
|
||||
- iommu_pmu->cpu = target;
|
||||
-
|
||||
- return 0;
|
||||
-}
|
||||
-
|
||||
-static int nr_iommu_pmu;
|
||||
-static enum cpuhp_state iommu_cpuhp_slot;
|
||||
-
|
||||
-static int iommu_pmu_cpuhp_setup(struct iommu_pmu *iommu_pmu)
|
||||
-{
|
||||
- int ret;
|
||||
-
|
||||
- if (!nr_iommu_pmu) {
|
||||
- ret = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN,
|
||||
- "driver/iommu/intel/perfmon:online",
|
||||
- iommu_pmu_cpu_online,
|
||||
- iommu_pmu_cpu_offline);
|
||||
- if (ret < 0)
|
||||
- return ret;
|
||||
- iommu_cpuhp_slot = ret;
|
||||
- }
|
||||
-
|
||||
- ret = cpuhp_state_add_instance(iommu_cpuhp_slot, &iommu_pmu->cpuhp_node);
|
||||
- if (ret) {
|
||||
- if (!nr_iommu_pmu)
|
||||
- cpuhp_remove_multi_state(iommu_cpuhp_slot);
|
||||
- return ret;
|
||||
- }
|
||||
- nr_iommu_pmu++;
|
||||
-
|
||||
- return 0;
|
||||
-}
|
||||
-
|
||||
-static void iommu_pmu_cpuhp_free(struct iommu_pmu *iommu_pmu)
|
||||
-{
|
||||
- cpuhp_state_remove_instance(iommu_cpuhp_slot, &iommu_pmu->cpuhp_node);
|
||||
-
|
||||
- if (--nr_iommu_pmu)
|
||||
- return;
|
||||
-
|
||||
- cpuhp_remove_multi_state(iommu_cpuhp_slot);
|
||||
-}
|
||||
-
|
||||
void iommu_pmu_register(struct intel_iommu *iommu)
|
||||
{
|
||||
struct iommu_pmu *iommu_pmu = iommu->pmu;
|
||||
@@ -866,17 +765,12 @@ void iommu_pmu_register(struct intel_iom
|
||||
if (__iommu_pmu_register(iommu))
|
||||
goto err;
|
||||
|
||||
- if (iommu_pmu_cpuhp_setup(iommu_pmu))
|
||||
- goto unregister;
|
||||
-
|
||||
/* Set interrupt for overflow */
|
||||
if (iommu_pmu_set_interrupt(iommu))
|
||||
- goto cpuhp_free;
|
||||
+ goto unregister;
|
||||
|
||||
return;
|
||||
|
||||
-cpuhp_free:
|
||||
- iommu_pmu_cpuhp_free(iommu_pmu);
|
||||
unregister:
|
||||
perf_pmu_unregister(&iommu_pmu->pmu);
|
||||
err:
|
||||
@@ -892,6 +786,5 @@ void iommu_pmu_unregister(struct intel_i
|
||||
return;
|
||||
|
||||
iommu_pmu_unset_interrupt(iommu);
|
||||
- iommu_pmu_cpuhp_free(iommu_pmu);
|
||||
perf_pmu_unregister(&iommu_pmu->pmu);
|
||||
}
|
238
debian/patches/patchset-pf/amd-rapl/0005-dmaengine-idxd-Clean-up-cpumask-and-hotplug-for-perf.patch
vendored
Normal file
238
debian/patches/patchset-pf/amd-rapl/0005-dmaengine-idxd-Clean-up-cpumask-and-hotplug-for-perf.patch
vendored
Normal file
@ -0,0 +1,238 @@
|
||||
From 76278bd3946d618ead2d9cc22612a75a4ab99ace Mon Sep 17 00:00:00 2001
|
||||
From: Kan Liang <kan.liang@linux.intel.com>
|
||||
Date: Fri, 2 Aug 2024 08:16:41 -0700
|
||||
Subject: dmaengine: idxd: Clean up cpumask and hotplug for perfmon
|
||||
|
||||
The idxd PMU is system-wide scope, which is supported by the generic
|
||||
perf_event subsystem now.
|
||||
|
||||
Set the scope for the idxd PMU and remove all the cpumask and hotplug
|
||||
codes.
|
||||
|
||||
Signed-off-by: Kan Liang <kan.liang@linux.intel.com>
|
||||
Cc: Fenghua Yu <fenghua.yu@intel.com>
|
||||
Cc: Dave Jiang <dave.jiang@intel.com>
|
||||
Cc: Vinod Koul <vkoul@kernel.org>
|
||||
Cc: dmaengine@vger.kernel.org
|
||||
Reviewed-by: Dave Jiang <dave.jiang@intel.com>
|
||||
Reviewed-by: Fenghua Yu <fenghua.yu@intel.com>
|
||||
---
|
||||
drivers/dma/idxd/idxd.h | 7 ---
|
||||
drivers/dma/idxd/init.c | 3 --
|
||||
drivers/dma/idxd/perfmon.c | 98 +-------------------------------------
|
||||
3 files changed, 1 insertion(+), 107 deletions(-)
|
||||
|
||||
--- a/drivers/dma/idxd/idxd.h
|
||||
+++ b/drivers/dma/idxd/idxd.h
|
||||
@@ -124,7 +124,6 @@ struct idxd_pmu {
|
||||
|
||||
struct pmu pmu;
|
||||
char name[IDXD_NAME_SIZE];
|
||||
- int cpu;
|
||||
|
||||
int n_counters;
|
||||
int counter_width;
|
||||
@@ -135,8 +134,6 @@ struct idxd_pmu {
|
||||
|
||||
unsigned long supported_filters;
|
||||
int n_filters;
|
||||
-
|
||||
- struct hlist_node cpuhp_node;
|
||||
};
|
||||
|
||||
#define IDXD_MAX_PRIORITY 0xf
|
||||
@@ -803,14 +800,10 @@ void idxd_user_counter_increment(struct
|
||||
int perfmon_pmu_init(struct idxd_device *idxd);
|
||||
void perfmon_pmu_remove(struct idxd_device *idxd);
|
||||
void perfmon_counter_overflow(struct idxd_device *idxd);
|
||||
-void perfmon_init(void);
|
||||
-void perfmon_exit(void);
|
||||
#else
|
||||
static inline int perfmon_pmu_init(struct idxd_device *idxd) { return 0; }
|
||||
static inline void perfmon_pmu_remove(struct idxd_device *idxd) {}
|
||||
static inline void perfmon_counter_overflow(struct idxd_device *idxd) {}
|
||||
-static inline void perfmon_init(void) {}
|
||||
-static inline void perfmon_exit(void) {}
|
||||
#endif
|
||||
|
||||
/* debugfs */
|
||||
--- a/drivers/dma/idxd/init.c
|
||||
+++ b/drivers/dma/idxd/init.c
|
||||
@@ -878,8 +878,6 @@ static int __init idxd_init_module(void)
|
||||
else
|
||||
support_enqcmd = true;
|
||||
|
||||
- perfmon_init();
|
||||
-
|
||||
err = idxd_driver_register(&idxd_drv);
|
||||
if (err < 0)
|
||||
goto err_idxd_driver_register;
|
||||
@@ -928,7 +926,6 @@ static void __exit idxd_exit_module(void
|
||||
idxd_driver_unregister(&idxd_drv);
|
||||
pci_unregister_driver(&idxd_pci_driver);
|
||||
idxd_cdev_remove();
|
||||
- perfmon_exit();
|
||||
idxd_remove_debugfs();
|
||||
}
|
||||
module_exit(idxd_exit_module);
|
||||
--- a/drivers/dma/idxd/perfmon.c
|
||||
+++ b/drivers/dma/idxd/perfmon.c
|
||||
@@ -6,29 +6,6 @@
|
||||
#include "idxd.h"
|
||||
#include "perfmon.h"
|
||||
|
||||
-static ssize_t cpumask_show(struct device *dev, struct device_attribute *attr,
|
||||
- char *buf);
|
||||
-
|
||||
-static cpumask_t perfmon_dsa_cpu_mask;
|
||||
-static bool cpuhp_set_up;
|
||||
-static enum cpuhp_state cpuhp_slot;
|
||||
-
|
||||
-/*
|
||||
- * perf userspace reads this attribute to determine which cpus to open
|
||||
- * counters on. It's connected to perfmon_dsa_cpu_mask, which is
|
||||
- * maintained by the cpu hotplug handlers.
|
||||
- */
|
||||
-static DEVICE_ATTR_RO(cpumask);
|
||||
-
|
||||
-static struct attribute *perfmon_cpumask_attrs[] = {
|
||||
- &dev_attr_cpumask.attr,
|
||||
- NULL,
|
||||
-};
|
||||
-
|
||||
-static struct attribute_group cpumask_attr_group = {
|
||||
- .attrs = perfmon_cpumask_attrs,
|
||||
-};
|
||||
-
|
||||
/*
|
||||
* These attributes specify the bits in the config word that the perf
|
||||
* syscall uses to pass the event ids and categories to perfmon.
|
||||
@@ -67,16 +44,9 @@ static struct attribute_group perfmon_fo
|
||||
|
||||
static const struct attribute_group *perfmon_attr_groups[] = {
|
||||
&perfmon_format_attr_group,
|
||||
- &cpumask_attr_group,
|
||||
NULL,
|
||||
};
|
||||
|
||||
-static ssize_t cpumask_show(struct device *dev, struct device_attribute *attr,
|
||||
- char *buf)
|
||||
-{
|
||||
- return cpumap_print_to_pagebuf(true, buf, &perfmon_dsa_cpu_mask);
|
||||
-}
|
||||
-
|
||||
static bool is_idxd_event(struct idxd_pmu *idxd_pmu, struct perf_event *event)
|
||||
{
|
||||
return &idxd_pmu->pmu == event->pmu;
|
||||
@@ -217,7 +187,6 @@ static int perfmon_pmu_event_init(struct
|
||||
return -EINVAL;
|
||||
|
||||
event->hw.event_base = ioread64(PERFMON_TABLE_OFFSET(idxd));
|
||||
- event->cpu = idxd->idxd_pmu->cpu;
|
||||
event->hw.config = event->attr.config;
|
||||
|
||||
if (event->group_leader != event)
|
||||
@@ -488,6 +457,7 @@ static void idxd_pmu_init(struct idxd_pm
|
||||
idxd_pmu->pmu.stop = perfmon_pmu_event_stop;
|
||||
idxd_pmu->pmu.read = perfmon_pmu_event_update;
|
||||
idxd_pmu->pmu.capabilities = PERF_PMU_CAP_NO_EXCLUDE;
|
||||
+ idxd_pmu->pmu.scope = PERF_PMU_SCOPE_SYS_WIDE;
|
||||
idxd_pmu->pmu.module = THIS_MODULE;
|
||||
}
|
||||
|
||||
@@ -496,47 +466,11 @@ void perfmon_pmu_remove(struct idxd_devi
|
||||
if (!idxd->idxd_pmu)
|
||||
return;
|
||||
|
||||
- cpuhp_state_remove_instance(cpuhp_slot, &idxd->idxd_pmu->cpuhp_node);
|
||||
perf_pmu_unregister(&idxd->idxd_pmu->pmu);
|
||||
kfree(idxd->idxd_pmu);
|
||||
idxd->idxd_pmu = NULL;
|
||||
}
|
||||
|
||||
-static int perf_event_cpu_online(unsigned int cpu, struct hlist_node *node)
|
||||
-{
|
||||
- struct idxd_pmu *idxd_pmu;
|
||||
-
|
||||
- idxd_pmu = hlist_entry_safe(node, typeof(*idxd_pmu), cpuhp_node);
|
||||
-
|
||||
- /* select the first online CPU as the designated reader */
|
||||
- if (cpumask_empty(&perfmon_dsa_cpu_mask)) {
|
||||
- cpumask_set_cpu(cpu, &perfmon_dsa_cpu_mask);
|
||||
- idxd_pmu->cpu = cpu;
|
||||
- }
|
||||
-
|
||||
- return 0;
|
||||
-}
|
||||
-
|
||||
-static int perf_event_cpu_offline(unsigned int cpu, struct hlist_node *node)
|
||||
-{
|
||||
- struct idxd_pmu *idxd_pmu;
|
||||
- unsigned int target;
|
||||
-
|
||||
- idxd_pmu = hlist_entry_safe(node, typeof(*idxd_pmu), cpuhp_node);
|
||||
-
|
||||
- if (!cpumask_test_and_clear_cpu(cpu, &perfmon_dsa_cpu_mask))
|
||||
- return 0;
|
||||
-
|
||||
- target = cpumask_any_but(cpu_online_mask, cpu);
|
||||
- /* migrate events if there is a valid target */
|
||||
- if (target < nr_cpu_ids) {
|
||||
- cpumask_set_cpu(target, &perfmon_dsa_cpu_mask);
|
||||
- perf_pmu_migrate_context(&idxd_pmu->pmu, cpu, target);
|
||||
- }
|
||||
-
|
||||
- return 0;
|
||||
-}
|
||||
-
|
||||
int perfmon_pmu_init(struct idxd_device *idxd)
|
||||
{
|
||||
union idxd_perfcap perfcap;
|
||||
@@ -544,12 +478,6 @@ int perfmon_pmu_init(struct idxd_device
|
||||
int rc = -ENODEV;
|
||||
|
||||
/*
|
||||
- * perfmon module initialization failed, nothing to do
|
||||
- */
|
||||
- if (!cpuhp_set_up)
|
||||
- return -ENODEV;
|
||||
-
|
||||
- /*
|
||||
* If perfmon_offset or num_counters is 0, it means perfmon is
|
||||
* not supported on this hardware.
|
||||
*/
|
||||
@@ -624,11 +552,6 @@ int perfmon_pmu_init(struct idxd_device
|
||||
if (rc)
|
||||
goto free;
|
||||
|
||||
- rc = cpuhp_state_add_instance(cpuhp_slot, &idxd_pmu->cpuhp_node);
|
||||
- if (rc) {
|
||||
- perf_pmu_unregister(&idxd->idxd_pmu->pmu);
|
||||
- goto free;
|
||||
- }
|
||||
out:
|
||||
return rc;
|
||||
free:
|
||||
@@ -637,22 +560,3 @@ free:
|
||||
|
||||
goto out;
|
||||
}
|
||||
-
|
||||
-void __init perfmon_init(void)
|
||||
-{
|
||||
- int rc = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN,
|
||||
- "driver/dma/idxd/perf:online",
|
||||
- perf_event_cpu_online,
|
||||
- perf_event_cpu_offline);
|
||||
- if (WARN_ON(rc < 0))
|
||||
- return;
|
||||
-
|
||||
- cpuhp_slot = rc;
|
||||
- cpuhp_set_up = true;
|
||||
-}
|
||||
-
|
||||
-void __exit perfmon_exit(void)
|
||||
-{
|
||||
- if (cpuhp_set_up)
|
||||
- cpuhp_remove_multi_state(cpuhp_slot);
|
||||
-}
|
@ -0,0 +1,84 @@
|
||||
From fb0a3b5932882f02ed42fcaa6db73aba3eafd6d7 Mon Sep 17 00:00:00 2001
|
||||
From: Kan Liang <kan.liang@linux.intel.com>
|
||||
Date: Fri, 2 Aug 2024 08:16:42 -0700
|
||||
Subject: perf/x86/rapl: Move the pmu allocation out of CPU hotplug
|
||||
|
||||
The rapl pmu just needs to be allocated once. It doesn't matter to be
|
||||
allocated at each CPU hotplug, or the global init_rapl_pmus().
|
||||
|
||||
Move the pmu allocation to the init_rapl_pmus(). So the generic hotplug
|
||||
supports can be applied.
|
||||
|
||||
Signed-off-by: Kan Liang <kan.liang@linux.intel.com>
|
||||
Cc: Dhananjay Ugwekar <Dhananjay.Ugwekar@amd.com>
|
||||
---
|
||||
arch/x86/events/rapl.c | 44 +++++++++++++++++++++++++++++-------------
|
||||
1 file changed, 31 insertions(+), 13 deletions(-)
|
||||
|
||||
--- a/arch/x86/events/rapl.c
|
||||
+++ b/arch/x86/events/rapl.c
|
||||
@@ -568,19 +568,8 @@ static int rapl_cpu_online(unsigned int
|
||||
struct rapl_pmu *pmu = cpu_to_rapl_pmu(cpu);
|
||||
int target;
|
||||
|
||||
- if (!pmu) {
|
||||
- pmu = kzalloc_node(sizeof(*pmu), GFP_KERNEL, cpu_to_node(cpu));
|
||||
- if (!pmu)
|
||||
- return -ENOMEM;
|
||||
-
|
||||
- raw_spin_lock_init(&pmu->lock);
|
||||
- INIT_LIST_HEAD(&pmu->active_list);
|
||||
- pmu->pmu = &rapl_pmus->pmu;
|
||||
- pmu->timer_interval = ms_to_ktime(rapl_timer_ms);
|
||||
- rapl_hrtimer_init(pmu);
|
||||
-
|
||||
- rapl_pmus->pmus[topology_logical_die_id(cpu)] = pmu;
|
||||
- }
|
||||
+ if (!pmu)
|
||||
+ return -ENOMEM;
|
||||
|
||||
/*
|
||||
* Check if there is an online cpu in the package which collects rapl
|
||||
@@ -673,6 +662,32 @@ static const struct attribute_group *rap
|
||||
NULL,
|
||||
};
|
||||
|
||||
+static void __init init_rapl_pmu(void)
|
||||
+{
|
||||
+ struct rapl_pmu *pmu;
|
||||
+ int cpu;
|
||||
+
|
||||
+ cpus_read_lock();
|
||||
+
|
||||
+ for_each_cpu(cpu, cpu_online_mask) {
|
||||
+ pmu = cpu_to_rapl_pmu(cpu);
|
||||
+ if (pmu)
|
||||
+ continue;
|
||||
+ pmu = kzalloc_node(sizeof(*pmu), GFP_KERNEL, cpu_to_node(cpu));
|
||||
+ if (!pmu)
|
||||
+ continue;
|
||||
+ raw_spin_lock_init(&pmu->lock);
|
||||
+ INIT_LIST_HEAD(&pmu->active_list);
|
||||
+ pmu->pmu = &rapl_pmus->pmu;
|
||||
+ pmu->timer_interval = ms_to_ktime(rapl_timer_ms);
|
||||
+ rapl_hrtimer_init(pmu);
|
||||
+
|
||||
+ rapl_pmus->pmus[topology_logical_die_id(cpu)] = pmu;
|
||||
+ }
|
||||
+
|
||||
+ cpus_read_unlock();
|
||||
+}
|
||||
+
|
||||
static int __init init_rapl_pmus(void)
|
||||
{
|
||||
int nr_rapl_pmu = topology_max_packages() * topology_max_dies_per_package();
|
||||
@@ -693,6 +708,9 @@ static int __init init_rapl_pmus(void)
|
||||
rapl_pmus->pmu.read = rapl_pmu_event_read;
|
||||
rapl_pmus->pmu.module = THIS_MODULE;
|
||||
rapl_pmus->pmu.capabilities = PERF_PMU_CAP_NO_EXCLUDE;
|
||||
+
|
||||
+ init_rapl_pmu();
|
||||
+
|
||||
return 0;
|
||||
}
|
||||
|
179
debian/patches/patchset-pf/amd-rapl/0007-perf-x86-rapl-Clean-up-cpumask-and-hotplug.patch
vendored
Normal file
179
debian/patches/patchset-pf/amd-rapl/0007-perf-x86-rapl-Clean-up-cpumask-and-hotplug.patch
vendored
Normal file
@ -0,0 +1,179 @@
|
||||
From 7b4f6ba1b1dc5f3120652bcb5921a697d5167bff Mon Sep 17 00:00:00 2001
|
||||
From: Kan Liang <kan.liang@linux.intel.com>
|
||||
Date: Fri, 2 Aug 2024 08:16:43 -0700
|
||||
Subject: perf/x86/rapl: Clean up cpumask and hotplug
|
||||
|
||||
The rapl pmu is die scope, which is supported by the generic perf_event
|
||||
subsystem now.
|
||||
|
||||
Set the scope for the rapl PMU and remove all the cpumask and hotplug
|
||||
codes.
|
||||
|
||||
Signed-off-by: Kan Liang <kan.liang@linux.intel.com>
|
||||
Cc: Dhananjay Ugwekar <Dhananjay.Ugwekar@amd.com>
|
||||
---
|
||||
arch/x86/events/rapl.c | 80 +-------------------------------------
|
||||
include/linux/cpuhotplug.h | 1 -
|
||||
2 files changed, 2 insertions(+), 79 deletions(-)
|
||||
|
||||
--- a/arch/x86/events/rapl.c
|
||||
+++ b/arch/x86/events/rapl.c
|
||||
@@ -135,7 +135,6 @@ struct rapl_model {
|
||||
/* 1/2^hw_unit Joule */
|
||||
static int rapl_hw_unit[NR_RAPL_DOMAINS] __read_mostly;
|
||||
static struct rapl_pmus *rapl_pmus;
|
||||
-static cpumask_t rapl_cpu_mask;
|
||||
static unsigned int rapl_cntr_mask;
|
||||
static u64 rapl_timer_ms;
|
||||
static struct perf_msr *rapl_msrs;
|
||||
@@ -340,8 +339,6 @@ static int rapl_pmu_event_init(struct pe
|
||||
if (event->cpu < 0)
|
||||
return -EINVAL;
|
||||
|
||||
- event->event_caps |= PERF_EV_CAP_READ_ACTIVE_PKG;
|
||||
-
|
||||
if (!cfg || cfg >= NR_RAPL_DOMAINS + 1)
|
||||
return -EINVAL;
|
||||
|
||||
@@ -360,7 +357,6 @@ static int rapl_pmu_event_init(struct pe
|
||||
pmu = cpu_to_rapl_pmu(event->cpu);
|
||||
if (!pmu)
|
||||
return -EINVAL;
|
||||
- event->cpu = pmu->cpu;
|
||||
event->pmu_private = pmu;
|
||||
event->hw.event_base = rapl_msrs[bit].msr;
|
||||
event->hw.config = cfg;
|
||||
@@ -374,23 +370,6 @@ static void rapl_pmu_event_read(struct p
|
||||
rapl_event_update(event);
|
||||
}
|
||||
|
||||
-static ssize_t rapl_get_attr_cpumask(struct device *dev,
|
||||
- struct device_attribute *attr, char *buf)
|
||||
-{
|
||||
- return cpumap_print_to_pagebuf(true, buf, &rapl_cpu_mask);
|
||||
-}
|
||||
-
|
||||
-static DEVICE_ATTR(cpumask, S_IRUGO, rapl_get_attr_cpumask, NULL);
|
||||
-
|
||||
-static struct attribute *rapl_pmu_attrs[] = {
|
||||
- &dev_attr_cpumask.attr,
|
||||
- NULL,
|
||||
-};
|
||||
-
|
||||
-static struct attribute_group rapl_pmu_attr_group = {
|
||||
- .attrs = rapl_pmu_attrs,
|
||||
-};
|
||||
-
|
||||
RAPL_EVENT_ATTR_STR(energy-cores, rapl_cores, "event=0x01");
|
||||
RAPL_EVENT_ATTR_STR(energy-pkg , rapl_pkg, "event=0x02");
|
||||
RAPL_EVENT_ATTR_STR(energy-ram , rapl_ram, "event=0x03");
|
||||
@@ -438,7 +417,6 @@ static struct attribute_group rapl_pmu_f
|
||||
};
|
||||
|
||||
static const struct attribute_group *rapl_attr_groups[] = {
|
||||
- &rapl_pmu_attr_group,
|
||||
&rapl_pmu_format_group,
|
||||
&rapl_pmu_events_group,
|
||||
NULL,
|
||||
@@ -541,49 +519,6 @@ static struct perf_msr amd_rapl_msrs[] =
|
||||
[PERF_RAPL_PSYS] = { 0, &rapl_events_psys_group, NULL, false, 0 },
|
||||
};
|
||||
|
||||
-static int rapl_cpu_offline(unsigned int cpu)
|
||||
-{
|
||||
- struct rapl_pmu *pmu = cpu_to_rapl_pmu(cpu);
|
||||
- int target;
|
||||
-
|
||||
- /* Check if exiting cpu is used for collecting rapl events */
|
||||
- if (!cpumask_test_and_clear_cpu(cpu, &rapl_cpu_mask))
|
||||
- return 0;
|
||||
-
|
||||
- pmu->cpu = -1;
|
||||
- /* Find a new cpu to collect rapl events */
|
||||
- target = cpumask_any_but(topology_die_cpumask(cpu), cpu);
|
||||
-
|
||||
- /* Migrate rapl events to the new target */
|
||||
- if (target < nr_cpu_ids) {
|
||||
- cpumask_set_cpu(target, &rapl_cpu_mask);
|
||||
- pmu->cpu = target;
|
||||
- perf_pmu_migrate_context(pmu->pmu, cpu, target);
|
||||
- }
|
||||
- return 0;
|
||||
-}
|
||||
-
|
||||
-static int rapl_cpu_online(unsigned int cpu)
|
||||
-{
|
||||
- struct rapl_pmu *pmu = cpu_to_rapl_pmu(cpu);
|
||||
- int target;
|
||||
-
|
||||
- if (!pmu)
|
||||
- return -ENOMEM;
|
||||
-
|
||||
- /*
|
||||
- * Check if there is an online cpu in the package which collects rapl
|
||||
- * events already.
|
||||
- */
|
||||
- target = cpumask_any_and(&rapl_cpu_mask, topology_die_cpumask(cpu));
|
||||
- if (target < nr_cpu_ids)
|
||||
- return 0;
|
||||
-
|
||||
- cpumask_set_cpu(cpu, &rapl_cpu_mask);
|
||||
- pmu->cpu = cpu;
|
||||
- return 0;
|
||||
-}
|
||||
-
|
||||
static int rapl_check_hw_unit(struct rapl_model *rm)
|
||||
{
|
||||
u64 msr_rapl_power_unit_bits;
|
||||
@@ -707,6 +642,7 @@ static int __init init_rapl_pmus(void)
|
||||
rapl_pmus->pmu.stop = rapl_pmu_event_stop;
|
||||
rapl_pmus->pmu.read = rapl_pmu_event_read;
|
||||
rapl_pmus->pmu.module = THIS_MODULE;
|
||||
+ rapl_pmus->pmu.scope = PERF_PMU_SCOPE_DIE;
|
||||
rapl_pmus->pmu.capabilities = PERF_PMU_CAP_NO_EXCLUDE;
|
||||
|
||||
init_rapl_pmu();
|
||||
@@ -857,24 +793,13 @@ static int __init rapl_pmu_init(void)
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
- /*
|
||||
- * Install callbacks. Core will call them for each online cpu.
|
||||
- */
|
||||
- ret = cpuhp_setup_state(CPUHP_AP_PERF_X86_RAPL_ONLINE,
|
||||
- "perf/x86/rapl:online",
|
||||
- rapl_cpu_online, rapl_cpu_offline);
|
||||
- if (ret)
|
||||
- goto out;
|
||||
-
|
||||
ret = perf_pmu_register(&rapl_pmus->pmu, "power", -1);
|
||||
if (ret)
|
||||
- goto out1;
|
||||
+ goto out;
|
||||
|
||||
rapl_advertise();
|
||||
return 0;
|
||||
|
||||
-out1:
|
||||
- cpuhp_remove_state(CPUHP_AP_PERF_X86_RAPL_ONLINE);
|
||||
out:
|
||||
pr_warn("Initialization failed (%d), disabled\n", ret);
|
||||
cleanup_rapl_pmus();
|
||||
@@ -884,7 +809,6 @@ module_init(rapl_pmu_init);
|
||||
|
||||
static void __exit intel_rapl_exit(void)
|
||||
{
|
||||
- cpuhp_remove_state_nocalls(CPUHP_AP_PERF_X86_RAPL_ONLINE);
|
||||
perf_pmu_unregister(&rapl_pmus->pmu);
|
||||
cleanup_rapl_pmus();
|
||||
}
|
||||
--- a/include/linux/cpuhotplug.h
|
||||
+++ b/include/linux/cpuhotplug.h
|
||||
@@ -207,7 +207,6 @@ enum cpuhp_state {
|
||||
CPUHP_AP_PERF_X86_UNCORE_ONLINE,
|
||||
CPUHP_AP_PERF_X86_AMD_UNCORE_ONLINE,
|
||||
CPUHP_AP_PERF_X86_AMD_POWER_ONLINE,
|
||||
- CPUHP_AP_PERF_X86_RAPL_ONLINE,
|
||||
CPUHP_AP_PERF_S390_CF_ONLINE,
|
||||
CPUHP_AP_PERF_S390_SF_ONLINE,
|
||||
CPUHP_AP_PERF_ARM_CCI_ONLINE,
|
101
debian/patches/patchset-pf/amd-rapl/0008-perf-x86-rapl-Fix-the-energy-pkg-event-for-AMD-CPUs.patch
vendored
Normal file
101
debian/patches/patchset-pf/amd-rapl/0008-perf-x86-rapl-Fix-the-energy-pkg-event-for-AMD-CPUs.patch
vendored
Normal file
@ -0,0 +1,101 @@
|
||||
From f1525664ff9da3241b3556594dc0b67506ae1ddd Mon Sep 17 00:00:00 2001
|
||||
From: Dhananjay Ugwekar <Dhananjay.Ugwekar@amd.com>
|
||||
Date: Tue, 10 Sep 2024 14:25:05 +0530
|
||||
Subject: perf/x86/rapl: Fix the energy-pkg event for AMD CPUs
|
||||
|
||||
After commit ("x86/cpu/topology: Add support for the AMD 0x80000026 leaf"),
|
||||
on AMD processors that support extended CPUID leaf 0x80000026, the
|
||||
topology_die_cpumask() and topology_logical_die_id() macros, no longer
|
||||
return the package cpumask and package id, instead they return the CCD
|
||||
(Core Complex Die) mask and id respectively. This leads to the energy-pkg
|
||||
event scope to be modified to CCD instead of package.
|
||||
|
||||
So, change the PMU scope for AMD and Hygon back to package.
|
||||
|
||||
On a 12 CCD 1 Package AMD Zen4 Genoa machine:
|
||||
|
||||
Before:
|
||||
$ cat /sys/devices/power/cpumask
|
||||
0,8,16,24,32,40,48,56,64,72,80,88.
|
||||
|
||||
The expected cpumask here is supposed to be just "0", as it is a package
|
||||
scope event, only one CPU will be collecting the event for all the CPUs in
|
||||
the package.
|
||||
|
||||
After:
|
||||
$ cat /sys/devices/power/cpumask
|
||||
0
|
||||
|
||||
Signed-off-by: Dhananjay Ugwekar <Dhananjay.Ugwekar@amd.com>
|
||||
---
|
||||
arch/x86/events/rapl.c | 35 ++++++++++++++++++++++++++++++++---
|
||||
1 file changed, 32 insertions(+), 3 deletions(-)
|
||||
|
||||
--- a/arch/x86/events/rapl.c
|
||||
+++ b/arch/x86/events/rapl.c
|
||||
@@ -139,9 +139,32 @@ static unsigned int rapl_cntr_mask;
|
||||
static u64 rapl_timer_ms;
|
||||
static struct perf_msr *rapl_msrs;
|
||||
|
||||
+/*
|
||||
+ * RAPL Package energy counter scope:
|
||||
+ * 1. AMD/HYGON platforms have a per-PKG package energy counter
|
||||
+ * 2. For Intel platforms
|
||||
+ * 2.1. CLX-AP is multi-die and its RAPL MSRs are die-scope
|
||||
+ * 2.2. Other Intel platforms are single die systems so the scope can be
|
||||
+ * considered as either pkg-scope or die-scope, and we are considering
|
||||
+ * them as die-scope.
|
||||
+ */
|
||||
+#define rapl_pmu_is_pkg_scope() \
|
||||
+ (boot_cpu_data.x86_vendor == X86_VENDOR_AMD || \
|
||||
+ boot_cpu_data.x86_vendor == X86_VENDOR_HYGON)
|
||||
+
|
||||
+/*
|
||||
+ * Helper function to get the correct topology id according to the
|
||||
+ * RAPL PMU scope.
|
||||
+ */
|
||||
+static inline unsigned int get_rapl_pmu_idx(int cpu)
|
||||
+{
|
||||
+ return rapl_pmu_is_pkg_scope() ? topology_logical_package_id(cpu) :
|
||||
+ topology_logical_die_id(cpu);
|
||||
+}
|
||||
+
|
||||
static inline struct rapl_pmu *cpu_to_rapl_pmu(unsigned int cpu)
|
||||
{
|
||||
- unsigned int rapl_pmu_idx = topology_logical_die_id(cpu);
|
||||
+ unsigned int rapl_pmu_idx = get_rapl_pmu_idx(cpu);
|
||||
|
||||
/*
|
||||
* The unsigned check also catches the '-1' return value for non
|
||||
@@ -617,7 +640,7 @@ static void __init init_rapl_pmu(void)
|
||||
pmu->timer_interval = ms_to_ktime(rapl_timer_ms);
|
||||
rapl_hrtimer_init(pmu);
|
||||
|
||||
- rapl_pmus->pmus[topology_logical_die_id(cpu)] = pmu;
|
||||
+ rapl_pmus->pmus[get_rapl_pmu_idx(cpu)] = pmu;
|
||||
}
|
||||
|
||||
cpus_read_unlock();
|
||||
@@ -626,6 +649,12 @@ static void __init init_rapl_pmu(void)
|
||||
static int __init init_rapl_pmus(void)
|
||||
{
|
||||
int nr_rapl_pmu = topology_max_packages() * topology_max_dies_per_package();
|
||||
+ int rapl_pmu_scope = PERF_PMU_SCOPE_DIE;
|
||||
+
|
||||
+ if (rapl_pmu_is_pkg_scope()) {
|
||||
+ nr_rapl_pmu = topology_max_packages();
|
||||
+ rapl_pmu_scope = PERF_PMU_SCOPE_PKG;
|
||||
+ }
|
||||
|
||||
rapl_pmus = kzalloc(struct_size(rapl_pmus, pmus, nr_rapl_pmu), GFP_KERNEL);
|
||||
if (!rapl_pmus)
|
||||
@@ -641,8 +670,8 @@ static int __init init_rapl_pmus(void)
|
||||
rapl_pmus->pmu.start = rapl_pmu_event_start;
|
||||
rapl_pmus->pmu.stop = rapl_pmu_event_stop;
|
||||
rapl_pmus->pmu.read = rapl_pmu_event_read;
|
||||
+ rapl_pmus->pmu.scope = rapl_pmu_scope;
|
||||
rapl_pmus->pmu.module = THIS_MODULE;
|
||||
- rapl_pmus->pmu.scope = PERF_PMU_SCOPE_DIE;
|
||||
rapl_pmus->pmu.capabilities = PERF_PMU_CAP_NO_EXCLUDE;
|
||||
|
||||
init_rapl_pmu();
|
77
debian/patches/patchset-pf/amd-rapl/0009-x86-topology-Introduce-topology_logical_core_id.patch
vendored
Normal file
77
debian/patches/patchset-pf/amd-rapl/0009-x86-topology-Introduce-topology_logical_core_id.patch
vendored
Normal file
@ -0,0 +1,77 @@
|
||||
From 9439067951f4d857272836b35812af26650d9c16 Mon Sep 17 00:00:00 2001
|
||||
From: K Prateek Nayak <kprateek.nayak@amd.com>
|
||||
Date: Fri, 13 Sep 2024 15:21:41 +0000
|
||||
Subject: x86/topology: Introduce topology_logical_core_id()
|
||||
|
||||
On x86, topology_core_id() returns a unique core ID within the PKG
|
||||
domain. Looking at match_smt() suggests that a core ID just needs to be
|
||||
unique within a LLC domain. For use cases such as the per-core RAPL PMU,
|
||||
there exists a need for a unique core ID across the entire system with
|
||||
multiple PKG domains. Introduce topology_logical_core_id() to derive a
|
||||
unique core ID across the system.
|
||||
|
||||
Signed-off-by: K Prateek Nayak <kprateek.nayak@amd.com>
|
||||
Signed-off-by: Dhananjay Ugwekar <Dhananjay.Ugwekar@amd.com>
|
||||
Reviewed-by: Zhang Rui <rui.zhang@intel.com>
|
||||
Tested-by: K Prateek Nayak <kprateek.nayak@amd.com>
|
||||
---
|
||||
Documentation/arch/x86/topology.rst | 4 ++++
|
||||
arch/x86/include/asm/processor.h | 1 +
|
||||
arch/x86/include/asm/topology.h | 1 +
|
||||
arch/x86/kernel/cpu/debugfs.c | 1 +
|
||||
arch/x86/kernel/cpu/topology_common.c | 1 +
|
||||
5 files changed, 8 insertions(+)
|
||||
|
||||
--- a/Documentation/arch/x86/topology.rst
|
||||
+++ b/Documentation/arch/x86/topology.rst
|
||||
@@ -135,6 +135,10 @@ Thread-related topology information in t
|
||||
The ID of the core to which a thread belongs. It is also printed in /proc/cpuinfo
|
||||
"core_id."
|
||||
|
||||
+ - topology_logical_core_id();
|
||||
+
|
||||
+ The logical core ID to which a thread belongs.
|
||||
+
|
||||
|
||||
|
||||
System topology examples
|
||||
--- a/arch/x86/include/asm/processor.h
|
||||
+++ b/arch/x86/include/asm/processor.h
|
||||
@@ -98,6 +98,7 @@ struct cpuinfo_topology {
|
||||
// Logical ID mappings
|
||||
u32 logical_pkg_id;
|
||||
u32 logical_die_id;
|
||||
+ u32 logical_core_id;
|
||||
|
||||
// AMD Node ID and Nodes per Package info
|
||||
u32 amd_node_id;
|
||||
--- a/arch/x86/include/asm/topology.h
|
||||
+++ b/arch/x86/include/asm/topology.h
|
||||
@@ -137,6 +137,7 @@ extern const struct cpumask *cpu_cluster
|
||||
#define topology_logical_package_id(cpu) (cpu_data(cpu).topo.logical_pkg_id)
|
||||
#define topology_physical_package_id(cpu) (cpu_data(cpu).topo.pkg_id)
|
||||
#define topology_logical_die_id(cpu) (cpu_data(cpu).topo.logical_die_id)
|
||||
+#define topology_logical_core_id(cpu) (cpu_data(cpu).topo.logical_core_id)
|
||||
#define topology_die_id(cpu) (cpu_data(cpu).topo.die_id)
|
||||
#define topology_core_id(cpu) (cpu_data(cpu).topo.core_id)
|
||||
#define topology_ppin(cpu) (cpu_data(cpu).ppin)
|
||||
--- a/arch/x86/kernel/cpu/debugfs.c
|
||||
+++ b/arch/x86/kernel/cpu/debugfs.c
|
||||
@@ -24,6 +24,7 @@ static int cpu_debug_show(struct seq_fil
|
||||
seq_printf(m, "core_id: %u\n", c->topo.core_id);
|
||||
seq_printf(m, "logical_pkg_id: %u\n", c->topo.logical_pkg_id);
|
||||
seq_printf(m, "logical_die_id: %u\n", c->topo.logical_die_id);
|
||||
+ seq_printf(m, "logical_core_id: %u\n", c->topo.logical_core_id);
|
||||
seq_printf(m, "llc_id: %u\n", c->topo.llc_id);
|
||||
seq_printf(m, "l2c_id: %u\n", c->topo.l2c_id);
|
||||
seq_printf(m, "amd_node_id: %u\n", c->topo.amd_node_id);
|
||||
--- a/arch/x86/kernel/cpu/topology_common.c
|
||||
+++ b/arch/x86/kernel/cpu/topology_common.c
|
||||
@@ -151,6 +151,7 @@ static void topo_set_ids(struct topo_sca
|
||||
if (!early) {
|
||||
c->topo.logical_pkg_id = topology_get_logical_id(apicid, TOPO_PKG_DOMAIN);
|
||||
c->topo.logical_die_id = topology_get_logical_id(apicid, TOPO_DIE_DOMAIN);
|
||||
+ c->topo.logical_core_id = topology_get_logical_id(apicid, TOPO_CORE_DOMAIN);
|
||||
}
|
||||
|
||||
/* Package relative core ID */
|
87
debian/patches/patchset-pf/amd-rapl/0010-perf-x86-rapl-Remove-the-cpu_to_rapl_pmu-function.patch
vendored
Normal file
87
debian/patches/patchset-pf/amd-rapl/0010-perf-x86-rapl-Remove-the-cpu_to_rapl_pmu-function.patch
vendored
Normal file
@ -0,0 +1,87 @@
|
||||
From b8e1231d5f78314de8f9066baba7b1fdd5e59218 Mon Sep 17 00:00:00 2001
|
||||
From: Dhananjay Ugwekar <Dhananjay.Ugwekar@amd.com>
|
||||
Date: Fri, 13 Sep 2024 15:21:42 +0000
|
||||
Subject: perf/x86/rapl: Remove the cpu_to_rapl_pmu() function
|
||||
|
||||
Preparation for the addition of per-core RAPL energy counter support for
|
||||
AMD CPUs. Post which, one cpu might be mapped to more than one rapl_pmu
|
||||
(package/die one or per-core one), also makes sense to use the
|
||||
get_rapl_pmu_idx macro which is anyway used to index into the
|
||||
rapl_pmus->pmus[] array.
|
||||
|
||||
Signed-off-by: Dhananjay Ugwekar <Dhananjay.Ugwekar@amd.com>
|
||||
---
|
||||
arch/x86/events/rapl.c | 29 +++++++++++++----------------
|
||||
1 file changed, 13 insertions(+), 16 deletions(-)
|
||||
|
||||
--- a/arch/x86/events/rapl.c
|
||||
+++ b/arch/x86/events/rapl.c
|
||||
@@ -162,17 +162,6 @@ static inline unsigned int get_rapl_pmu_
|
||||
topology_logical_die_id(cpu);
|
||||
}
|
||||
|
||||
-static inline struct rapl_pmu *cpu_to_rapl_pmu(unsigned int cpu)
|
||||
-{
|
||||
- unsigned int rapl_pmu_idx = get_rapl_pmu_idx(cpu);
|
||||
-
|
||||
- /*
|
||||
- * The unsigned check also catches the '-1' return value for non
|
||||
- * existent mappings in the topology map.
|
||||
- */
|
||||
- return rapl_pmu_idx < rapl_pmus->nr_rapl_pmu ? rapl_pmus->pmus[rapl_pmu_idx] : NULL;
|
||||
-}
|
||||
-
|
||||
static inline u64 rapl_read_counter(struct perf_event *event)
|
||||
{
|
||||
u64 raw;
|
||||
@@ -348,7 +337,7 @@ static void rapl_pmu_event_del(struct pe
|
||||
static int rapl_pmu_event_init(struct perf_event *event)
|
||||
{
|
||||
u64 cfg = event->attr.config & RAPL_EVENT_MASK;
|
||||
- int bit, ret = 0;
|
||||
+ int bit, rapl_pmu_idx, ret = 0;
|
||||
struct rapl_pmu *pmu;
|
||||
|
||||
/* only look at RAPL events */
|
||||
@@ -376,8 +365,12 @@ static int rapl_pmu_event_init(struct pe
|
||||
if (event->attr.sample_period) /* no sampling */
|
||||
return -EINVAL;
|
||||
|
||||
+ rapl_pmu_idx = get_rapl_pmu_idx(event->cpu);
|
||||
+ if (rapl_pmu_idx >= rapl_pmus->nr_rapl_pmu)
|
||||
+ return -EINVAL;
|
||||
+
|
||||
/* must be done before validate_group */
|
||||
- pmu = cpu_to_rapl_pmu(event->cpu);
|
||||
+ pmu = rapl_pmus->pmus[rapl_pmu_idx];
|
||||
if (!pmu)
|
||||
return -EINVAL;
|
||||
event->pmu_private = pmu;
|
||||
@@ -623,12 +616,16 @@ static const struct attribute_group *rap
|
||||
static void __init init_rapl_pmu(void)
|
||||
{
|
||||
struct rapl_pmu *pmu;
|
||||
- int cpu;
|
||||
+ int cpu, rapl_pmu_idx;
|
||||
|
||||
cpus_read_lock();
|
||||
|
||||
for_each_cpu(cpu, cpu_online_mask) {
|
||||
- pmu = cpu_to_rapl_pmu(cpu);
|
||||
+ rapl_pmu_idx = get_rapl_pmu_idx(cpu);
|
||||
+ if (rapl_pmu_idx >= rapl_pmus->nr_rapl_pmu)
|
||||
+ continue;
|
||||
+
|
||||
+ pmu = rapl_pmus->pmus[rapl_pmu_idx];
|
||||
if (pmu)
|
||||
continue;
|
||||
pmu = kzalloc_node(sizeof(*pmu), GFP_KERNEL, cpu_to_node(cpu));
|
||||
@@ -640,7 +637,7 @@ static void __init init_rapl_pmu(void)
|
||||
pmu->timer_interval = ms_to_ktime(rapl_timer_ms);
|
||||
rapl_hrtimer_init(pmu);
|
||||
|
||||
- rapl_pmus->pmus[get_rapl_pmu_idx(cpu)] = pmu;
|
||||
+ rapl_pmus->pmus[rapl_pmu_idx] = pmu;
|
||||
}
|
||||
|
||||
cpus_read_unlock();
|
240
debian/patches/patchset-pf/amd-rapl/0011-perf-x86-rapl-Rename-rapl_pmu-variables.patch
vendored
Normal file
240
debian/patches/patchset-pf/amd-rapl/0011-perf-x86-rapl-Rename-rapl_pmu-variables.patch
vendored
Normal file
@ -0,0 +1,240 @@
|
||||
From 07ec9f38cac6eb6e5b0b062ef99e9458ba567de8 Mon Sep 17 00:00:00 2001
|
||||
From: Dhananjay Ugwekar <Dhananjay.Ugwekar@amd.com>
|
||||
Date: Fri, 13 Sep 2024 15:21:43 +0000
|
||||
Subject: perf/x86/rapl: Rename rapl_pmu variables
|
||||
|
||||
Rename struct rapl_pmu variables from "pmu" to "rapl_pmu", to
|
||||
avoid any confusion between the variables of two different
|
||||
structs pmu and rapl_pmu. As rapl_pmu also contains a pointer to
|
||||
struct pmu, which leads to situations in code like pmu->pmu,
|
||||
which is needlessly confusing. Above scenario is replaced with
|
||||
much more readable rapl_pmu->pmu with this change.
|
||||
|
||||
Also rename "pmus" member in rapl_pmus struct, for same reason.
|
||||
|
||||
No functional change.
|
||||
|
||||
Signed-off-by: Dhananjay Ugwekar <Dhananjay.Ugwekar@amd.com>
|
||||
---
|
||||
arch/x86/events/rapl.c | 93 +++++++++++++++++++++---------------------
|
||||
1 file changed, 47 insertions(+), 46 deletions(-)
|
||||
|
||||
--- a/arch/x86/events/rapl.c
|
||||
+++ b/arch/x86/events/rapl.c
|
||||
@@ -116,7 +116,7 @@ struct rapl_pmu {
|
||||
struct rapl_pmus {
|
||||
struct pmu pmu;
|
||||
unsigned int nr_rapl_pmu;
|
||||
- struct rapl_pmu *pmus[] __counted_by(nr_rapl_pmu);
|
||||
+ struct rapl_pmu *rapl_pmu[] __counted_by(nr_rapl_pmu);
|
||||
};
|
||||
|
||||
enum rapl_unit_quirk {
|
||||
@@ -223,34 +223,34 @@ static void rapl_start_hrtimer(struct ra
|
||||
|
||||
static enum hrtimer_restart rapl_hrtimer_handle(struct hrtimer *hrtimer)
|
||||
{
|
||||
- struct rapl_pmu *pmu = container_of(hrtimer, struct rapl_pmu, hrtimer);
|
||||
+ struct rapl_pmu *rapl_pmu = container_of(hrtimer, struct rapl_pmu, hrtimer);
|
||||
struct perf_event *event;
|
||||
unsigned long flags;
|
||||
|
||||
- if (!pmu->n_active)
|
||||
+ if (!rapl_pmu->n_active)
|
||||
return HRTIMER_NORESTART;
|
||||
|
||||
- raw_spin_lock_irqsave(&pmu->lock, flags);
|
||||
+ raw_spin_lock_irqsave(&rapl_pmu->lock, flags);
|
||||
|
||||
- list_for_each_entry(event, &pmu->active_list, active_entry)
|
||||
+ list_for_each_entry(event, &rapl_pmu->active_list, active_entry)
|
||||
rapl_event_update(event);
|
||||
|
||||
- raw_spin_unlock_irqrestore(&pmu->lock, flags);
|
||||
+ raw_spin_unlock_irqrestore(&rapl_pmu->lock, flags);
|
||||
|
||||
- hrtimer_forward_now(hrtimer, pmu->timer_interval);
|
||||
+ hrtimer_forward_now(hrtimer, rapl_pmu->timer_interval);
|
||||
|
||||
return HRTIMER_RESTART;
|
||||
}
|
||||
|
||||
-static void rapl_hrtimer_init(struct rapl_pmu *pmu)
|
||||
+static void rapl_hrtimer_init(struct rapl_pmu *rapl_pmu)
|
||||
{
|
||||
- struct hrtimer *hr = &pmu->hrtimer;
|
||||
+ struct hrtimer *hr = &rapl_pmu->hrtimer;
|
||||
|
||||
hrtimer_init(hr, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
|
||||
hr->function = rapl_hrtimer_handle;
|
||||
}
|
||||
|
||||
-static void __rapl_pmu_event_start(struct rapl_pmu *pmu,
|
||||
+static void __rapl_pmu_event_start(struct rapl_pmu *rapl_pmu,
|
||||
struct perf_event *event)
|
||||
{
|
||||
if (WARN_ON_ONCE(!(event->hw.state & PERF_HES_STOPPED)))
|
||||
@@ -258,39 +258,39 @@ static void __rapl_pmu_event_start(struc
|
||||
|
||||
event->hw.state = 0;
|
||||
|
||||
- list_add_tail(&event->active_entry, &pmu->active_list);
|
||||
+ list_add_tail(&event->active_entry, &rapl_pmu->active_list);
|
||||
|
||||
local64_set(&event->hw.prev_count, rapl_read_counter(event));
|
||||
|
||||
- pmu->n_active++;
|
||||
- if (pmu->n_active == 1)
|
||||
- rapl_start_hrtimer(pmu);
|
||||
+ rapl_pmu->n_active++;
|
||||
+ if (rapl_pmu->n_active == 1)
|
||||
+ rapl_start_hrtimer(rapl_pmu);
|
||||
}
|
||||
|
||||
static void rapl_pmu_event_start(struct perf_event *event, int mode)
|
||||
{
|
||||
- struct rapl_pmu *pmu = event->pmu_private;
|
||||
+ struct rapl_pmu *rapl_pmu = event->pmu_private;
|
||||
unsigned long flags;
|
||||
|
||||
- raw_spin_lock_irqsave(&pmu->lock, flags);
|
||||
- __rapl_pmu_event_start(pmu, event);
|
||||
- raw_spin_unlock_irqrestore(&pmu->lock, flags);
|
||||
+ raw_spin_lock_irqsave(&rapl_pmu->lock, flags);
|
||||
+ __rapl_pmu_event_start(rapl_pmu, event);
|
||||
+ raw_spin_unlock_irqrestore(&rapl_pmu->lock, flags);
|
||||
}
|
||||
|
||||
static void rapl_pmu_event_stop(struct perf_event *event, int mode)
|
||||
{
|
||||
- struct rapl_pmu *pmu = event->pmu_private;
|
||||
+ struct rapl_pmu *rapl_pmu = event->pmu_private;
|
||||
struct hw_perf_event *hwc = &event->hw;
|
||||
unsigned long flags;
|
||||
|
||||
- raw_spin_lock_irqsave(&pmu->lock, flags);
|
||||
+ raw_spin_lock_irqsave(&rapl_pmu->lock, flags);
|
||||
|
||||
/* mark event as deactivated and stopped */
|
||||
if (!(hwc->state & PERF_HES_STOPPED)) {
|
||||
- WARN_ON_ONCE(pmu->n_active <= 0);
|
||||
- pmu->n_active--;
|
||||
- if (pmu->n_active == 0)
|
||||
- hrtimer_cancel(&pmu->hrtimer);
|
||||
+ WARN_ON_ONCE(rapl_pmu->n_active <= 0);
|
||||
+ rapl_pmu->n_active--;
|
||||
+ if (rapl_pmu->n_active == 0)
|
||||
+ hrtimer_cancel(&rapl_pmu->hrtimer);
|
||||
|
||||
list_del(&event->active_entry);
|
||||
|
||||
@@ -308,23 +308,23 @@ static void rapl_pmu_event_stop(struct p
|
||||
hwc->state |= PERF_HES_UPTODATE;
|
||||
}
|
||||
|
||||
- raw_spin_unlock_irqrestore(&pmu->lock, flags);
|
||||
+ raw_spin_unlock_irqrestore(&rapl_pmu->lock, flags);
|
||||
}
|
||||
|
||||
static int rapl_pmu_event_add(struct perf_event *event, int mode)
|
||||
{
|
||||
- struct rapl_pmu *pmu = event->pmu_private;
|
||||
+ struct rapl_pmu *rapl_pmu = event->pmu_private;
|
||||
struct hw_perf_event *hwc = &event->hw;
|
||||
unsigned long flags;
|
||||
|
||||
- raw_spin_lock_irqsave(&pmu->lock, flags);
|
||||
+ raw_spin_lock_irqsave(&rapl_pmu->lock, flags);
|
||||
|
||||
hwc->state = PERF_HES_UPTODATE | PERF_HES_STOPPED;
|
||||
|
||||
if (mode & PERF_EF_START)
|
||||
- __rapl_pmu_event_start(pmu, event);
|
||||
+ __rapl_pmu_event_start(rapl_pmu, event);
|
||||
|
||||
- raw_spin_unlock_irqrestore(&pmu->lock, flags);
|
||||
+ raw_spin_unlock_irqrestore(&rapl_pmu->lock, flags);
|
||||
|
||||
return 0;
|
||||
}
|
||||
@@ -338,7 +338,7 @@ static int rapl_pmu_event_init(struct pe
|
||||
{
|
||||
u64 cfg = event->attr.config & RAPL_EVENT_MASK;
|
||||
int bit, rapl_pmu_idx, ret = 0;
|
||||
- struct rapl_pmu *pmu;
|
||||
+ struct rapl_pmu *rapl_pmu;
|
||||
|
||||
/* only look at RAPL events */
|
||||
if (event->attr.type != rapl_pmus->pmu.type)
|
||||
@@ -370,10 +370,11 @@ static int rapl_pmu_event_init(struct pe
|
||||
return -EINVAL;
|
||||
|
||||
/* must be done before validate_group */
|
||||
- pmu = rapl_pmus->pmus[rapl_pmu_idx];
|
||||
- if (!pmu)
|
||||
+ rapl_pmu = rapl_pmus->rapl_pmu[rapl_pmu_idx];
|
||||
+ if (!rapl_pmu)
|
||||
return -EINVAL;
|
||||
- event->pmu_private = pmu;
|
||||
+
|
||||
+ event->pmu_private = rapl_pmu;
|
||||
event->hw.event_base = rapl_msrs[bit].msr;
|
||||
event->hw.config = cfg;
|
||||
event->hw.idx = bit;
|
||||
@@ -600,7 +601,7 @@ static void cleanup_rapl_pmus(void)
|
||||
int i;
|
||||
|
||||
for (i = 0; i < rapl_pmus->nr_rapl_pmu; i++)
|
||||
- kfree(rapl_pmus->pmus[i]);
|
||||
+ kfree(rapl_pmus->rapl_pmu[i]);
|
||||
kfree(rapl_pmus);
|
||||
}
|
||||
|
||||
@@ -615,7 +616,7 @@ static const struct attribute_group *rap
|
||||
|
||||
static void __init init_rapl_pmu(void)
|
||||
{
|
||||
- struct rapl_pmu *pmu;
|
||||
+ struct rapl_pmu *rapl_pmu;
|
||||
int cpu, rapl_pmu_idx;
|
||||
|
||||
cpus_read_lock();
|
||||
@@ -625,19 +626,19 @@ static void __init init_rapl_pmu(void)
|
||||
if (rapl_pmu_idx >= rapl_pmus->nr_rapl_pmu)
|
||||
continue;
|
||||
|
||||
- pmu = rapl_pmus->pmus[rapl_pmu_idx];
|
||||
- if (pmu)
|
||||
+ rapl_pmu = rapl_pmus->rapl_pmu[rapl_pmu_idx];
|
||||
+ if (rapl_pmu)
|
||||
continue;
|
||||
- pmu = kzalloc_node(sizeof(*pmu), GFP_KERNEL, cpu_to_node(cpu));
|
||||
- if (!pmu)
|
||||
+ rapl_pmu = kzalloc_node(sizeof(*rapl_pmu), GFP_KERNEL, cpu_to_node(cpu));
|
||||
+ if (!rapl_pmu)
|
||||
continue;
|
||||
- raw_spin_lock_init(&pmu->lock);
|
||||
- INIT_LIST_HEAD(&pmu->active_list);
|
||||
- pmu->pmu = &rapl_pmus->pmu;
|
||||
- pmu->timer_interval = ms_to_ktime(rapl_timer_ms);
|
||||
- rapl_hrtimer_init(pmu);
|
||||
+ raw_spin_lock_init(&rapl_pmu->lock);
|
||||
+ INIT_LIST_HEAD(&rapl_pmu->active_list);
|
||||
+ rapl_pmu->pmu = &rapl_pmus->pmu;
|
||||
+ rapl_pmu->timer_interval = ms_to_ktime(rapl_timer_ms);
|
||||
+ rapl_hrtimer_init(rapl_pmu);
|
||||
|
||||
- rapl_pmus->pmus[rapl_pmu_idx] = pmu;
|
||||
+ rapl_pmus->rapl_pmu[rapl_pmu_idx] = rapl_pmu;
|
||||
}
|
||||
|
||||
cpus_read_unlock();
|
||||
@@ -653,7 +654,7 @@ static int __init init_rapl_pmus(void)
|
||||
rapl_pmu_scope = PERF_PMU_SCOPE_PKG;
|
||||
}
|
||||
|
||||
- rapl_pmus = kzalloc(struct_size(rapl_pmus, pmus, nr_rapl_pmu), GFP_KERNEL);
|
||||
+ rapl_pmus = kzalloc(struct_size(rapl_pmus, rapl_pmu, nr_rapl_pmu), GFP_KERNEL);
|
||||
if (!rapl_pmus)
|
||||
return -ENOMEM;
|
||||
|
75
debian/patches/patchset-pf/amd-rapl/0012-perf-x86-rapl-Make-rapl_model-struct-global.patch
vendored
Normal file
75
debian/patches/patchset-pf/amd-rapl/0012-perf-x86-rapl-Make-rapl_model-struct-global.patch
vendored
Normal file
@ -0,0 +1,75 @@
|
||||
From 68614752b9fd6b6bae6f9ab7b02fc28350c5a541 Mon Sep 17 00:00:00 2001
|
||||
From: Dhananjay Ugwekar <Dhananjay.Ugwekar@amd.com>
|
||||
Date: Fri, 13 Sep 2024 15:47:56 +0000
|
||||
Subject: perf/x86/rapl: Make rapl_model struct global
|
||||
|
||||
Preparation for per-core energy counter support addition for AMD CPUs.
|
||||
|
||||
As there will always be just one rapl_model variable on a system, make it
|
||||
global, to make it easier to access it from any function.
|
||||
|
||||
No functional change.
|
||||
|
||||
Signed-off-by: Dhananjay Ugwekar <Dhananjay.Ugwekar@amd.com>
|
||||
---
|
||||
arch/x86/events/rapl.c | 16 ++++++++--------
|
||||
1 file changed, 8 insertions(+), 8 deletions(-)
|
||||
|
||||
--- a/arch/x86/events/rapl.c
|
||||
+++ b/arch/x86/events/rapl.c
|
||||
@@ -138,6 +138,7 @@ static struct rapl_pmus *rapl_pmus;
|
||||
static unsigned int rapl_cntr_mask;
|
||||
static u64 rapl_timer_ms;
|
||||
static struct perf_msr *rapl_msrs;
|
||||
+static struct rapl_model *rapl_model;
|
||||
|
||||
/*
|
||||
* RAPL Package energy counter scope:
|
||||
@@ -536,18 +537,18 @@ static struct perf_msr amd_rapl_msrs[] =
|
||||
[PERF_RAPL_PSYS] = { 0, &rapl_events_psys_group, NULL, false, 0 },
|
||||
};
|
||||
|
||||
-static int rapl_check_hw_unit(struct rapl_model *rm)
|
||||
+static int rapl_check_hw_unit(void)
|
||||
{
|
||||
u64 msr_rapl_power_unit_bits;
|
||||
int i;
|
||||
|
||||
/* protect rdmsrl() to handle virtualization */
|
||||
- if (rdmsrl_safe(rm->msr_power_unit, &msr_rapl_power_unit_bits))
|
||||
+ if (rdmsrl_safe(rapl_model->msr_power_unit, &msr_rapl_power_unit_bits))
|
||||
return -1;
|
||||
for (i = 0; i < NR_RAPL_DOMAINS; i++)
|
||||
rapl_hw_unit[i] = (msr_rapl_power_unit_bits >> 8) & 0x1FULL;
|
||||
|
||||
- switch (rm->unit_quirk) {
|
||||
+ switch (rapl_model->unit_quirk) {
|
||||
/*
|
||||
* DRAM domain on HSW server and KNL has fixed energy unit which can be
|
||||
* different than the unit from power unit MSR. See
|
||||
@@ -798,21 +799,20 @@ MODULE_DEVICE_TABLE(x86cpu, rapl_model_m
|
||||
static int __init rapl_pmu_init(void)
|
||||
{
|
||||
const struct x86_cpu_id *id;
|
||||
- struct rapl_model *rm;
|
||||
int ret;
|
||||
|
||||
id = x86_match_cpu(rapl_model_match);
|
||||
if (!id)
|
||||
return -ENODEV;
|
||||
|
||||
- rm = (struct rapl_model *) id->driver_data;
|
||||
+ rapl_model = (struct rapl_model *) id->driver_data;
|
||||
|
||||
- rapl_msrs = rm->rapl_msrs;
|
||||
+ rapl_msrs = rapl_model->rapl_msrs;
|
||||
|
||||
rapl_cntr_mask = perf_msr_probe(rapl_msrs, PERF_RAPL_MAX,
|
||||
- false, (void *) &rm->events);
|
||||
+ false, (void *) &rapl_model->events);
|
||||
|
||||
- ret = rapl_check_hw_unit(rm);
|
||||
+ ret = rapl_check_hw_unit();
|
||||
if (ret)
|
||||
return ret;
|
||||
|
112
debian/patches/patchset-pf/amd-rapl/0013-perf-x86-rapl-Add-arguments-to-the-cleanup-and-init-.patch
vendored
Normal file
112
debian/patches/patchset-pf/amd-rapl/0013-perf-x86-rapl-Add-arguments-to-the-cleanup-and-init-.patch
vendored
Normal file
@ -0,0 +1,112 @@
|
||||
From b10b887510ccb0b6bc7294888982b862703c9c32 Mon Sep 17 00:00:00 2001
|
||||
From: Dhananjay Ugwekar <Dhananjay.Ugwekar@amd.com>
|
||||
Date: Fri, 13 Sep 2024 15:47:57 +0000
|
||||
Subject: perf/x86/rapl: Add arguments to the cleanup and init functions
|
||||
|
||||
Prep for per-core RAPL PMU addition.
|
||||
|
||||
No functional change.
|
||||
|
||||
Signed-off-by: Dhananjay Ugwekar <Dhananjay.Ugwekar@amd.com>
|
||||
---
|
||||
arch/x86/events/rapl.c | 32 +++++++++++++++++++-------------
|
||||
1 file changed, 19 insertions(+), 13 deletions(-)
|
||||
|
||||
--- a/arch/x86/events/rapl.c
|
||||
+++ b/arch/x86/events/rapl.c
|
||||
@@ -597,7 +597,7 @@ static void __init rapl_advertise(void)
|
||||
}
|
||||
}
|
||||
|
||||
-static void cleanup_rapl_pmus(void)
|
||||
+static void cleanup_rapl_pmus(struct rapl_pmus *rapl_pmus)
|
||||
{
|
||||
int i;
|
||||
|
||||
@@ -615,7 +615,7 @@ static const struct attribute_group *rap
|
||||
NULL,
|
||||
};
|
||||
|
||||
-static void __init init_rapl_pmu(void)
|
||||
+static void __init init_rapl_pmu(struct rapl_pmus *rapl_pmus)
|
||||
{
|
||||
struct rapl_pmu *rapl_pmu;
|
||||
int cpu, rapl_pmu_idx;
|
||||
@@ -645,20 +645,22 @@ static void __init init_rapl_pmu(void)
|
||||
cpus_read_unlock();
|
||||
}
|
||||
|
||||
-static int __init init_rapl_pmus(void)
|
||||
+static int __init init_rapl_pmus(struct rapl_pmus **rapl_pmus_ptr, int rapl_pmu_scope)
|
||||
{
|
||||
- int nr_rapl_pmu = topology_max_packages() * topology_max_dies_per_package();
|
||||
- int rapl_pmu_scope = PERF_PMU_SCOPE_DIE;
|
||||
+ int nr_rapl_pmu;
|
||||
+ struct rapl_pmus *rapl_pmus;
|
||||
|
||||
- if (rapl_pmu_is_pkg_scope()) {
|
||||
- nr_rapl_pmu = topology_max_packages();
|
||||
- rapl_pmu_scope = PERF_PMU_SCOPE_PKG;
|
||||
- }
|
||||
+ if (rapl_pmu_scope == PERF_PMU_SCOPE_PKG)
|
||||
+ nr_rapl_pmu = topology_max_packages();
|
||||
+ else
|
||||
+ nr_rapl_pmu = topology_max_packages() * topology_max_dies_per_package();
|
||||
|
||||
rapl_pmus = kzalloc(struct_size(rapl_pmus, rapl_pmu, nr_rapl_pmu), GFP_KERNEL);
|
||||
if (!rapl_pmus)
|
||||
return -ENOMEM;
|
||||
|
||||
+ *rapl_pmus_ptr = rapl_pmus;
|
||||
+
|
||||
rapl_pmus->nr_rapl_pmu = nr_rapl_pmu;
|
||||
rapl_pmus->pmu.attr_groups = rapl_attr_groups;
|
||||
rapl_pmus->pmu.attr_update = rapl_attr_update;
|
||||
@@ -673,7 +675,7 @@ static int __init init_rapl_pmus(void)
|
||||
rapl_pmus->pmu.module = THIS_MODULE;
|
||||
rapl_pmus->pmu.capabilities = PERF_PMU_CAP_NO_EXCLUDE;
|
||||
|
||||
- init_rapl_pmu();
|
||||
+ init_rapl_pmu(rapl_pmus);
|
||||
|
||||
return 0;
|
||||
}
|
||||
@@ -799,8 +801,12 @@ MODULE_DEVICE_TABLE(x86cpu, rapl_model_m
|
||||
static int __init rapl_pmu_init(void)
|
||||
{
|
||||
const struct x86_cpu_id *id;
|
||||
+ int rapl_pmu_scope = PERF_PMU_SCOPE_DIE;
|
||||
int ret;
|
||||
|
||||
+ if (rapl_pmu_is_pkg_scope())
|
||||
+ rapl_pmu_scope = PERF_PMU_SCOPE_PKG;
|
||||
+
|
||||
id = x86_match_cpu(rapl_model_match);
|
||||
if (!id)
|
||||
return -ENODEV;
|
||||
@@ -816,7 +822,7 @@ static int __init rapl_pmu_init(void)
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
- ret = init_rapl_pmus();
|
||||
+ ret = init_rapl_pmus(&rapl_pmus, rapl_pmu_scope);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
@@ -829,7 +835,7 @@ static int __init rapl_pmu_init(void)
|
||||
|
||||
out:
|
||||
pr_warn("Initialization failed (%d), disabled\n", ret);
|
||||
- cleanup_rapl_pmus();
|
||||
+ cleanup_rapl_pmus(rapl_pmus);
|
||||
return ret;
|
||||
}
|
||||
module_init(rapl_pmu_init);
|
||||
@@ -837,6 +843,6 @@ module_init(rapl_pmu_init);
|
||||
static void __exit intel_rapl_exit(void)
|
||||
{
|
||||
perf_pmu_unregister(&rapl_pmus->pmu);
|
||||
- cleanup_rapl_pmus();
|
||||
+ cleanup_rapl_pmus(rapl_pmus);
|
||||
}
|
||||
module_exit(intel_rapl_exit);
|
358
debian/patches/patchset-pf/amd-rapl/0014-perf-x86-rapl-Modify-the-generic-variable-names-to-_.patch
vendored
Normal file
358
debian/patches/patchset-pf/amd-rapl/0014-perf-x86-rapl-Modify-the-generic-variable-names-to-_.patch
vendored
Normal file
@ -0,0 +1,358 @@
|
||||
From b5c83c40540298a39f8314034b705f1236b17a9f Mon Sep 17 00:00:00 2001
|
||||
From: Dhananjay Ugwekar <Dhananjay.Ugwekar@amd.com>
|
||||
Date: Fri, 13 Sep 2024 15:47:58 +0000
|
||||
Subject: perf/x86/rapl: Modify the generic variable names to *_pkg*
|
||||
|
||||
Prep for addition of power_per_core PMU to handle core scope energy
|
||||
consumption for AMD CPUs.
|
||||
|
||||
Replace the generic names with *_pkg*, to differentiate between the
|
||||
scopes of the two different PMUs and their variables.
|
||||
|
||||
No functional change.
|
||||
|
||||
Signed-off-by: Dhananjay Ugwekar <Dhananjay.Ugwekar@amd.com>
|
||||
---
|
||||
arch/x86/events/rapl.c | 118 ++++++++++++++++++++---------------------
|
||||
1 file changed, 59 insertions(+), 59 deletions(-)
|
||||
|
||||
--- a/arch/x86/events/rapl.c
|
||||
+++ b/arch/x86/events/rapl.c
|
||||
@@ -70,18 +70,18 @@ MODULE_LICENSE("GPL");
|
||||
/*
|
||||
* RAPL energy status counters
|
||||
*/
|
||||
-enum perf_rapl_events {
|
||||
+enum perf_rapl_pkg_events {
|
||||
PERF_RAPL_PP0 = 0, /* all cores */
|
||||
PERF_RAPL_PKG, /* entire package */
|
||||
PERF_RAPL_RAM, /* DRAM */
|
||||
PERF_RAPL_PP1, /* gpu */
|
||||
PERF_RAPL_PSYS, /* psys */
|
||||
|
||||
- PERF_RAPL_MAX,
|
||||
- NR_RAPL_DOMAINS = PERF_RAPL_MAX,
|
||||
+ PERF_RAPL_PKG_EVENTS_MAX,
|
||||
+ NR_RAPL_PKG_DOMAINS = PERF_RAPL_PKG_EVENTS_MAX,
|
||||
};
|
||||
|
||||
-static const char *const rapl_domain_names[NR_RAPL_DOMAINS] __initconst = {
|
||||
+static const char *const rapl_pkg_domain_names[NR_RAPL_PKG_DOMAINS] __initconst = {
|
||||
"pp0-core",
|
||||
"package",
|
||||
"dram",
|
||||
@@ -126,16 +126,16 @@ enum rapl_unit_quirk {
|
||||
};
|
||||
|
||||
struct rapl_model {
|
||||
- struct perf_msr *rapl_msrs;
|
||||
- unsigned long events;
|
||||
+ struct perf_msr *rapl_pkg_msrs;
|
||||
+ unsigned long pkg_events;
|
||||
unsigned int msr_power_unit;
|
||||
enum rapl_unit_quirk unit_quirk;
|
||||
};
|
||||
|
||||
/* 1/2^hw_unit Joule */
|
||||
-static int rapl_hw_unit[NR_RAPL_DOMAINS] __read_mostly;
|
||||
-static struct rapl_pmus *rapl_pmus;
|
||||
-static unsigned int rapl_cntr_mask;
|
||||
+static int rapl_pkg_hw_unit[NR_RAPL_PKG_DOMAINS] __read_mostly;
|
||||
+static struct rapl_pmus *rapl_pmus_pkg;
|
||||
+static unsigned int rapl_pkg_cntr_mask;
|
||||
static u64 rapl_timer_ms;
|
||||
static struct perf_msr *rapl_msrs;
|
||||
static struct rapl_model *rapl_model;
|
||||
@@ -149,7 +149,7 @@ static struct rapl_model *rapl_model;
|
||||
* considered as either pkg-scope or die-scope, and we are considering
|
||||
* them as die-scope.
|
||||
*/
|
||||
-#define rapl_pmu_is_pkg_scope() \
|
||||
+#define rapl_pkg_pmu_is_pkg_scope() \
|
||||
(boot_cpu_data.x86_vendor == X86_VENDOR_AMD || \
|
||||
boot_cpu_data.x86_vendor == X86_VENDOR_HYGON)
|
||||
|
||||
@@ -159,7 +159,7 @@ static struct rapl_model *rapl_model;
|
||||
*/
|
||||
static inline unsigned int get_rapl_pmu_idx(int cpu)
|
||||
{
|
||||
- return rapl_pmu_is_pkg_scope() ? topology_logical_package_id(cpu) :
|
||||
+ return rapl_pkg_pmu_is_pkg_scope() ? topology_logical_package_id(cpu) :
|
||||
topology_logical_die_id(cpu);
|
||||
}
|
||||
|
||||
@@ -172,7 +172,7 @@ static inline u64 rapl_read_counter(stru
|
||||
|
||||
static inline u64 rapl_scale(u64 v, int cfg)
|
||||
{
|
||||
- if (cfg > NR_RAPL_DOMAINS) {
|
||||
+ if (cfg > NR_RAPL_PKG_DOMAINS) {
|
||||
pr_warn("Invalid domain %d, failed to scale data\n", cfg);
|
||||
return v;
|
||||
}
|
||||
@@ -182,7 +182,7 @@ static inline u64 rapl_scale(u64 v, int
|
||||
* or use ldexp(count, -32).
|
||||
* Watts = Joules/Time delta
|
||||
*/
|
||||
- return v << (32 - rapl_hw_unit[cfg - 1]);
|
||||
+ return v << (32 - rapl_pkg_hw_unit[cfg - 1]);
|
||||
}
|
||||
|
||||
static u64 rapl_event_update(struct perf_event *event)
|
||||
@@ -342,7 +342,7 @@ static int rapl_pmu_event_init(struct pe
|
||||
struct rapl_pmu *rapl_pmu;
|
||||
|
||||
/* only look at RAPL events */
|
||||
- if (event->attr.type != rapl_pmus->pmu.type)
|
||||
+ if (event->attr.type != rapl_pmus_pkg->pmu.type)
|
||||
return -ENOENT;
|
||||
|
||||
/* check only supported bits are set */
|
||||
@@ -352,14 +352,14 @@ static int rapl_pmu_event_init(struct pe
|
||||
if (event->cpu < 0)
|
||||
return -EINVAL;
|
||||
|
||||
- if (!cfg || cfg >= NR_RAPL_DOMAINS + 1)
|
||||
+ if (!cfg || cfg >= NR_RAPL_PKG_DOMAINS + 1)
|
||||
return -EINVAL;
|
||||
|
||||
- cfg = array_index_nospec((long)cfg, NR_RAPL_DOMAINS + 1);
|
||||
+ cfg = array_index_nospec((long)cfg, NR_RAPL_PKG_DOMAINS + 1);
|
||||
bit = cfg - 1;
|
||||
|
||||
/* check event supported */
|
||||
- if (!(rapl_cntr_mask & (1 << bit)))
|
||||
+ if (!(rapl_pkg_cntr_mask & (1 << bit)))
|
||||
return -EINVAL;
|
||||
|
||||
/* unsupported modes and filters */
|
||||
@@ -367,11 +367,11 @@ static int rapl_pmu_event_init(struct pe
|
||||
return -EINVAL;
|
||||
|
||||
rapl_pmu_idx = get_rapl_pmu_idx(event->cpu);
|
||||
- if (rapl_pmu_idx >= rapl_pmus->nr_rapl_pmu)
|
||||
+ if (rapl_pmu_idx >= rapl_pmus_pkg->nr_rapl_pmu)
|
||||
return -EINVAL;
|
||||
|
||||
/* must be done before validate_group */
|
||||
- rapl_pmu = rapl_pmus->rapl_pmu[rapl_pmu_idx];
|
||||
+ rapl_pmu = rapl_pmus_pkg->rapl_pmu[rapl_pmu_idx];
|
||||
if (!rapl_pmu)
|
||||
return -EINVAL;
|
||||
|
||||
@@ -525,11 +525,11 @@ static struct perf_msr intel_rapl_spr_ms
|
||||
};
|
||||
|
||||
/*
|
||||
- * Force to PERF_RAPL_MAX size due to:
|
||||
- * - perf_msr_probe(PERF_RAPL_MAX)
|
||||
+ * Force to PERF_RAPL_PKG_EVENTS_MAX size due to:
|
||||
+ * - perf_msr_probe(PERF_RAPL_PKG_EVENTS_MAX)
|
||||
* - want to use same event codes across both architectures
|
||||
*/
|
||||
-static struct perf_msr amd_rapl_msrs[] = {
|
||||
+static struct perf_msr amd_rapl_pkg_msrs[] = {
|
||||
[PERF_RAPL_PP0] = { 0, &rapl_events_cores_group, NULL, false, 0 },
|
||||
[PERF_RAPL_PKG] = { MSR_AMD_PKG_ENERGY_STATUS, &rapl_events_pkg_group, test_msr, false, RAPL_MSR_MASK },
|
||||
[PERF_RAPL_RAM] = { 0, &rapl_events_ram_group, NULL, false, 0 },
|
||||
@@ -545,8 +545,8 @@ static int rapl_check_hw_unit(void)
|
||||
/* protect rdmsrl() to handle virtualization */
|
||||
if (rdmsrl_safe(rapl_model->msr_power_unit, &msr_rapl_power_unit_bits))
|
||||
return -1;
|
||||
- for (i = 0; i < NR_RAPL_DOMAINS; i++)
|
||||
- rapl_hw_unit[i] = (msr_rapl_power_unit_bits >> 8) & 0x1FULL;
|
||||
+ for (i = 0; i < NR_RAPL_PKG_DOMAINS; i++)
|
||||
+ rapl_pkg_hw_unit[i] = (msr_rapl_power_unit_bits >> 8) & 0x1FULL;
|
||||
|
||||
switch (rapl_model->unit_quirk) {
|
||||
/*
|
||||
@@ -556,11 +556,11 @@ static int rapl_check_hw_unit(void)
|
||||
* of 2. Datasheet, September 2014, Reference Number: 330784-001 "
|
||||
*/
|
||||
case RAPL_UNIT_QUIRK_INTEL_HSW:
|
||||
- rapl_hw_unit[PERF_RAPL_RAM] = 16;
|
||||
+ rapl_pkg_hw_unit[PERF_RAPL_RAM] = 16;
|
||||
break;
|
||||
/* SPR uses a fixed energy unit for Psys domain. */
|
||||
case RAPL_UNIT_QUIRK_INTEL_SPR:
|
||||
- rapl_hw_unit[PERF_RAPL_PSYS] = 0;
|
||||
+ rapl_pkg_hw_unit[PERF_RAPL_PSYS] = 0;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
@@ -575,9 +575,9 @@ static int rapl_check_hw_unit(void)
|
||||
* if hw unit is 32, then we use 2 ms 1/200/2
|
||||
*/
|
||||
rapl_timer_ms = 2;
|
||||
- if (rapl_hw_unit[0] < 32) {
|
||||
+ if (rapl_pkg_hw_unit[0] < 32) {
|
||||
rapl_timer_ms = (1000 / (2 * 100));
|
||||
- rapl_timer_ms *= (1ULL << (32 - rapl_hw_unit[0] - 1));
|
||||
+ rapl_timer_ms *= (1ULL << (32 - rapl_pkg_hw_unit[0] - 1));
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
@@ -587,12 +587,12 @@ static void __init rapl_advertise(void)
|
||||
int i;
|
||||
|
||||
pr_info("API unit is 2^-32 Joules, %d fixed counters, %llu ms ovfl timer\n",
|
||||
- hweight32(rapl_cntr_mask), rapl_timer_ms);
|
||||
+ hweight32(rapl_pkg_cntr_mask), rapl_timer_ms);
|
||||
|
||||
- for (i = 0; i < NR_RAPL_DOMAINS; i++) {
|
||||
- if (rapl_cntr_mask & (1 << i)) {
|
||||
+ for (i = 0; i < NR_RAPL_PKG_DOMAINS; i++) {
|
||||
+ if (rapl_pkg_cntr_mask & (1 << i)) {
|
||||
pr_info("hw unit of domain %s 2^-%d Joules\n",
|
||||
- rapl_domain_names[i], rapl_hw_unit[i]);
|
||||
+ rapl_pkg_domain_names[i], rapl_pkg_hw_unit[i]);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -681,71 +681,71 @@ static int __init init_rapl_pmus(struct
|
||||
}
|
||||
|
||||
static struct rapl_model model_snb = {
|
||||
- .events = BIT(PERF_RAPL_PP0) |
|
||||
+ .pkg_events = BIT(PERF_RAPL_PP0) |
|
||||
BIT(PERF_RAPL_PKG) |
|
||||
BIT(PERF_RAPL_PP1),
|
||||
.msr_power_unit = MSR_RAPL_POWER_UNIT,
|
||||
- .rapl_msrs = intel_rapl_msrs,
|
||||
+ .rapl_pkg_msrs = intel_rapl_msrs,
|
||||
};
|
||||
|
||||
static struct rapl_model model_snbep = {
|
||||
- .events = BIT(PERF_RAPL_PP0) |
|
||||
+ .pkg_events = BIT(PERF_RAPL_PP0) |
|
||||
BIT(PERF_RAPL_PKG) |
|
||||
BIT(PERF_RAPL_RAM),
|
||||
.msr_power_unit = MSR_RAPL_POWER_UNIT,
|
||||
- .rapl_msrs = intel_rapl_msrs,
|
||||
+ .rapl_pkg_msrs = intel_rapl_msrs,
|
||||
};
|
||||
|
||||
static struct rapl_model model_hsw = {
|
||||
- .events = BIT(PERF_RAPL_PP0) |
|
||||
+ .pkg_events = BIT(PERF_RAPL_PP0) |
|
||||
BIT(PERF_RAPL_PKG) |
|
||||
BIT(PERF_RAPL_RAM) |
|
||||
BIT(PERF_RAPL_PP1),
|
||||
.msr_power_unit = MSR_RAPL_POWER_UNIT,
|
||||
- .rapl_msrs = intel_rapl_msrs,
|
||||
+ .rapl_pkg_msrs = intel_rapl_msrs,
|
||||
};
|
||||
|
||||
static struct rapl_model model_hsx = {
|
||||
- .events = BIT(PERF_RAPL_PP0) |
|
||||
+ .pkg_events = BIT(PERF_RAPL_PP0) |
|
||||
BIT(PERF_RAPL_PKG) |
|
||||
BIT(PERF_RAPL_RAM),
|
||||
.unit_quirk = RAPL_UNIT_QUIRK_INTEL_HSW,
|
||||
.msr_power_unit = MSR_RAPL_POWER_UNIT,
|
||||
- .rapl_msrs = intel_rapl_msrs,
|
||||
+ .rapl_pkg_msrs = intel_rapl_msrs,
|
||||
};
|
||||
|
||||
static struct rapl_model model_knl = {
|
||||
- .events = BIT(PERF_RAPL_PKG) |
|
||||
+ .pkg_events = BIT(PERF_RAPL_PKG) |
|
||||
BIT(PERF_RAPL_RAM),
|
||||
.unit_quirk = RAPL_UNIT_QUIRK_INTEL_HSW,
|
||||
.msr_power_unit = MSR_RAPL_POWER_UNIT,
|
||||
- .rapl_msrs = intel_rapl_msrs,
|
||||
+ .rapl_pkg_msrs = intel_rapl_msrs,
|
||||
};
|
||||
|
||||
static struct rapl_model model_skl = {
|
||||
- .events = BIT(PERF_RAPL_PP0) |
|
||||
+ .pkg_events = BIT(PERF_RAPL_PP0) |
|
||||
BIT(PERF_RAPL_PKG) |
|
||||
BIT(PERF_RAPL_RAM) |
|
||||
BIT(PERF_RAPL_PP1) |
|
||||
BIT(PERF_RAPL_PSYS),
|
||||
.msr_power_unit = MSR_RAPL_POWER_UNIT,
|
||||
- .rapl_msrs = intel_rapl_msrs,
|
||||
+ .rapl_pkg_msrs = intel_rapl_msrs,
|
||||
};
|
||||
|
||||
static struct rapl_model model_spr = {
|
||||
- .events = BIT(PERF_RAPL_PP0) |
|
||||
+ .pkg_events = BIT(PERF_RAPL_PP0) |
|
||||
BIT(PERF_RAPL_PKG) |
|
||||
BIT(PERF_RAPL_RAM) |
|
||||
BIT(PERF_RAPL_PSYS),
|
||||
.unit_quirk = RAPL_UNIT_QUIRK_INTEL_SPR,
|
||||
.msr_power_unit = MSR_RAPL_POWER_UNIT,
|
||||
- .rapl_msrs = intel_rapl_spr_msrs,
|
||||
+ .rapl_pkg_msrs = intel_rapl_spr_msrs,
|
||||
};
|
||||
|
||||
static struct rapl_model model_amd_hygon = {
|
||||
- .events = BIT(PERF_RAPL_PKG),
|
||||
+ .pkg_events = BIT(PERF_RAPL_PKG),
|
||||
.msr_power_unit = MSR_AMD_RAPL_POWER_UNIT,
|
||||
- .rapl_msrs = amd_rapl_msrs,
|
||||
+ .rapl_pkg_msrs = amd_rapl_pkg_msrs,
|
||||
};
|
||||
|
||||
static const struct x86_cpu_id rapl_model_match[] __initconst = {
|
||||
@@ -801,11 +801,11 @@ MODULE_DEVICE_TABLE(x86cpu, rapl_model_m
|
||||
static int __init rapl_pmu_init(void)
|
||||
{
|
||||
const struct x86_cpu_id *id;
|
||||
- int rapl_pmu_scope = PERF_PMU_SCOPE_DIE;
|
||||
+ int rapl_pkg_pmu_scope = PERF_PMU_SCOPE_DIE;
|
||||
int ret;
|
||||
|
||||
- if (rapl_pmu_is_pkg_scope())
|
||||
- rapl_pmu_scope = PERF_PMU_SCOPE_PKG;
|
||||
+ if (rapl_pkg_pmu_is_pkg_scope())
|
||||
+ rapl_pkg_pmu_scope = PERF_PMU_SCOPE_PKG;
|
||||
|
||||
id = x86_match_cpu(rapl_model_match);
|
||||
if (!id)
|
||||
@@ -813,20 +813,20 @@ static int __init rapl_pmu_init(void)
|
||||
|
||||
rapl_model = (struct rapl_model *) id->driver_data;
|
||||
|
||||
- rapl_msrs = rapl_model->rapl_msrs;
|
||||
+ rapl_msrs = rapl_model->rapl_pkg_msrs;
|
||||
|
||||
- rapl_cntr_mask = perf_msr_probe(rapl_msrs, PERF_RAPL_MAX,
|
||||
- false, (void *) &rapl_model->events);
|
||||
+ rapl_pkg_cntr_mask = perf_msr_probe(rapl_msrs, PERF_RAPL_PKG_EVENTS_MAX,
|
||||
+ false, (void *) &rapl_model->pkg_events);
|
||||
|
||||
ret = rapl_check_hw_unit();
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
- ret = init_rapl_pmus(&rapl_pmus, rapl_pmu_scope);
|
||||
+ ret = init_rapl_pmus(&rapl_pmus_pkg, rapl_pkg_pmu_scope);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
- ret = perf_pmu_register(&rapl_pmus->pmu, "power", -1);
|
||||
+ ret = perf_pmu_register(&rapl_pmus_pkg->pmu, "power", -1);
|
||||
if (ret)
|
||||
goto out;
|
||||
|
||||
@@ -835,14 +835,14 @@ static int __init rapl_pmu_init(void)
|
||||
|
||||
out:
|
||||
pr_warn("Initialization failed (%d), disabled\n", ret);
|
||||
- cleanup_rapl_pmus(rapl_pmus);
|
||||
+ cleanup_rapl_pmus(rapl_pmus_pkg);
|
||||
return ret;
|
||||
}
|
||||
module_init(rapl_pmu_init);
|
||||
|
||||
static void __exit intel_rapl_exit(void)
|
||||
{
|
||||
- perf_pmu_unregister(&rapl_pmus->pmu);
|
||||
- cleanup_rapl_pmus(rapl_pmus);
|
||||
+ perf_pmu_unregister(&rapl_pmus_pkg->pmu);
|
||||
+ cleanup_rapl_pmus(rapl_pmus_pkg);
|
||||
}
|
||||
module_exit(intel_rapl_exit);
|
@ -0,0 +1,47 @@
|
||||
From dbc0343069c8f86fad0d8d9075f70f79114ef10a Mon Sep 17 00:00:00 2001
|
||||
From: Dhananjay Ugwekar <Dhananjay.Ugwekar@amd.com>
|
||||
Date: Fri, 13 Sep 2024 15:47:59 +0000
|
||||
Subject: perf/x86/rapl: Remove the global variable rapl_msrs
|
||||
|
||||
After making the rapl_model struct global, the rapl_msrs global
|
||||
variable isn't needed, so remove it.
|
||||
|
||||
Also it will be cleaner when new per-core scope PMU is added. As we will
|
||||
need to maintain two rapl_msrs array(one for per-core scope and one for
|
||||
package scope PMU), inside the rapl_model struct.
|
||||
|
||||
Signed-off-by: Dhananjay Ugwekar <Dhananjay.Ugwekar@amd.com>
|
||||
---
|
||||
arch/x86/events/rapl.c | 7 ++-----
|
||||
1 file changed, 2 insertions(+), 5 deletions(-)
|
||||
|
||||
--- a/arch/x86/events/rapl.c
|
||||
+++ b/arch/x86/events/rapl.c
|
||||
@@ -137,7 +137,6 @@ static int rapl_pkg_hw_unit[NR_RAPL_PKG_
|
||||
static struct rapl_pmus *rapl_pmus_pkg;
|
||||
static unsigned int rapl_pkg_cntr_mask;
|
||||
static u64 rapl_timer_ms;
|
||||
-static struct perf_msr *rapl_msrs;
|
||||
static struct rapl_model *rapl_model;
|
||||
|
||||
/*
|
||||
@@ -376,7 +375,7 @@ static int rapl_pmu_event_init(struct pe
|
||||
return -EINVAL;
|
||||
|
||||
event->pmu_private = rapl_pmu;
|
||||
- event->hw.event_base = rapl_msrs[bit].msr;
|
||||
+ event->hw.event_base = rapl_model->rapl_pkg_msrs[bit].msr;
|
||||
event->hw.config = cfg;
|
||||
event->hw.idx = bit;
|
||||
|
||||
@@ -813,9 +812,7 @@ static int __init rapl_pmu_init(void)
|
||||
|
||||
rapl_model = (struct rapl_model *) id->driver_data;
|
||||
|
||||
- rapl_msrs = rapl_model->rapl_pkg_msrs;
|
||||
-
|
||||
- rapl_pkg_cntr_mask = perf_msr_probe(rapl_msrs, PERF_RAPL_PKG_EVENTS_MAX,
|
||||
+ rapl_pkg_cntr_mask = perf_msr_probe(rapl_model->rapl_pkg_msrs, PERF_RAPL_PKG_EVENTS_MAX,
|
||||
false, (void *) &rapl_model->pkg_events);
|
||||
|
||||
ret = rapl_check_hw_unit();
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
x
Reference in New Issue
Block a user